From 2c9804e2842e7d21fcf1f4761d5f530af5f93dfd Mon Sep 17 00:00:00 2001 From: Angel Francisco Orta Date: Wed, 8 Jan 2020 22:30:19 +0100 Subject: [PATCH 01/38] CNAM-432: Refactoring in drugs and followup test classes, not used methods removed and finish implementation in DrugExtractor CNAM-432: Refactoring tests CNAM-432: Solved requests from PR CNAM-432: Object names and unused elements resolved CNAM-432: Solved problem with ambiguity --- .../cmap/cnam/etl/events/Drug.scala | 19 +- .../etl/extractors/drugs/DrugExtractor.scala | 50 +- .../etl/transformers/follow_up/Columns.scala | 4 - .../follow_up/FollowUpTransformer.scala | 24 +- .../FollowUpTransformerUtilities.scala | 63 +- .../cmap/cnam/etl/events/DrugSuite.scala | 21 +- .../drugs/DrugsExtractorSuite.scala | 622 +++++++++++++++--- .../exposures/ExposurePeriodAdderSuite.scala | 87 +-- .../exposures/ExposureTransformerSuite.scala | 10 +- .../follow_up/FollowUpTransformerSuite.scala | 34 +- 10 files changed, 681 insertions(+), 253 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Drug.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Drug.scala index fad54865..b26e629b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Drug.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Drug.scala @@ -11,21 +11,6 @@ trait Drug extends Dispensation with EventBuilder { override val category: EventCategory[Drug] = "drug" - def fromRow( - r: Row, - patientIDCol: String = "patientID", - nameCol: String = "name", - dosageCol: String = "dosage", - dateCol: String = "eventDate"): Event[Drug] = { - - Drug( - r.getAs[String](patientIDCol), - r.getAs[String](nameCol), - r.getAs[Double](dosageCol), - r.getAs[Timestamp](dateCol) - ) - } - - def apply(patientID: String, name: String, dosage: Double, date: Timestamp): Event[Drug] = - Event(patientID, category, groupID = "NA", name, dosage, date, None) + def apply(patientID: String, name: String, dosage: Double, groupID: String, date: Timestamp): Event[Drug] = + Event(patientID, category, groupID, name, dosage, date, None) } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala index a63a3b74..b4362537 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala @@ -31,25 +31,31 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { input.filter(isInExtractorScope _).filter(isInStudy(codes) _) } }.flatMap(builder _).distinct() - } - - def extractGroupId(r: Row): String = { - Base64.encodeBase64(s"${r.getAs[String](ColNames.FluxDate)}_${r.getAs[String](ColNames.FluxProcessingDate)}_${ - r.getAs[String]( - ColNames - .EmitterType - ) - }_${r.getAs[String](ColNames.EmitterId)}_${r.getAs[String](ColNames.FluxSeqNumber)}_${ - r.getAs[String]( - ColNames - .OrganisationOldId - ) - }_${r.getAs[String](ColNames.OrganisationDecompteNumber)}".getBytes()).map(_.toChar).mkString - - + /** It generate a hash using the values of these columns + *(FLX_DIS_DTD,FLX_TRT_DTD,FLX_EMT_TYP,FLX_EMT_NUM,FLX_EMT_ORD,ORG_CLE_NUM,DCT_ORD_NUM). + * It allows to identify each prescription in a unique way, it can be used to identify + * the possible interactions of molecules prescript in the same period. + * + * @param r The Row object itself + * @return A hash Id unique in a string format + */ + def extractGroupId(r: Row): String = { + Base64.encodeBase64( + s"${r.getAs[String](ColNames.FluxDate)}_${r.getAs[String](ColNames.FluxProcessingDate)}_${ + r.getAs[String]( + ColNames + .EmitterType + ) + }_${r.getAs[String](ColNames.EmitterId)}_${r.getAs[String](ColNames.FluxSeqNumber)}_${ + r.getAs[String]( + ColNames + .OrganisationOldId + ) + }_${r.getAs[String](ColNames.OrganisationDecompteNumber)}".getBytes() + ).map(_.toChar).mkString } @@ -64,8 +70,9 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { lazy val patientID = getPatientID(row) lazy val conditioning = getConditioning(row) lazy val date = getEventDate(row) + lazy val groupID = extractGroupId(row) - classification.map(code => Drug(patientID, code, conditioning, date)) + classification.map(code => Drug(patientID, code, conditioning, groupID, date)) } private def getPatientID(row: Row): String = row.getAs[String](ColNames.PatientId) @@ -75,6 +82,7 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { private def getEventDate(row: Row): Timestamp = row.getAs[Timestamp](ColNames.Date) override def getInput(sources: Sources): DataFrame = { + val neededColumns: List[Column] = List( col("NUM_ENQ").cast(StringType).as("patientID"), col("ER_PHA_F__PHA_PRS_C13").cast(StringType).as("CIP13"), @@ -82,7 +90,7 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { col("EXE_SOI_DTD").cast(TimestampType).as("eventDate"), col("molecule_combination").cast(StringType).as("molecules"), col("PHA_CND_TOP").cast(StringType).as("conditioning") - ) + ) ::: ColNames.GroupID.map(col) lazy val irPhaR = sources.irPha.get lazy val dcir = sources.dcir.get @@ -96,7 +104,7 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { .na.drop(Seq("eventDate", "CIP13", "ATC5")) } - final object ColNames { + final object ColNames extends Serializable { val PatientId = "patientID" val Conditioning = "conditioning" val Date = "eventDate" @@ -109,6 +117,10 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { lazy val FluxSeqNumber = "FLX_EMT_ORD" lazy val OrganisationOldId = "ORG_CLE_NUM" lazy val OrganisationDecompteNumber = "DCT_ORD_NUM" + + lazy val GroupID = List( + FluxDate, FluxProcessingDate, EmitterType, EmitterId, FluxSeqNumber, OrganisationOldId, OrganisationDecompteNumber + ) } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/Columns.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/Columns.scala index 882a8d9a..8fc941d4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/Columns.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/Columns.scala @@ -27,8 +27,4 @@ private[follow_up] object Columns { final val TracklossDate = "trackloss" final val FirstTargetDiseaseDate = "firstTargetDisease" - object EndReasons extends Enumeration { - val Death, Disease, Trackloss, ObservationEnd = Value - } - } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala index e7f8da14..bf217289 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala @@ -40,14 +40,14 @@ class FollowUpTransformer(config: FollowUpTransformerConfig) { .agg( min(DeathDate).as(DeathDate), min(FollowUpStart).as(FollowUpStart), - min(ObservationEnd).as(ObservationEnd) + min(Columns.ObservationEnd).as(Columns.ObservationEnd) ) .map( e => PatientDates( e.getAs[String](PatientID), Option(e.getAs[Timestamp](DeathDate)), Option(e.getAs[Timestamp](FollowUpStart)), - Option(e.getAs[Timestamp](ObservationEnd)) + Option(e.getAs[Timestamp](Columns.ObservationEnd)) ) ) @@ -61,29 +61,17 @@ class FollowUpTransformer(config: FollowUpTransformerConfig) { ) .map(e => TrackLossDate(e.getAs[String](PatientID), Option(e.getAs[Timestamp](TracklossDate)))) - val disease = config.outcomeName.getOrElse(None).toString - - val outcomesDisease: Dataset[Event[Outcome]] = outcomes - .filter(e => e.value.matches(s".*$disease.*")) - .groupBy(col(PatientID)) - .agg( - min(Start).as(Start) - ).map(e => Outcome(e.getAs[String](PatientID), disease, e.getAs[Timestamp](Start))) - patientDates .joinWith(tracklossDates, tracklossDates.col(PatientID) === patientDates.col(PatientID), "left_outer") - .joinWith(outcomesDisease, col(PatientID) === col(s"_1.$PatientID"), "left_outer") .map { e => - val trackloss: Option[Timestamp] = Try(e._1._2.trackloss).getOrElse(None) - val disease: Option[Timestamp] = Try(Option(e._2.start)).getOrElse(None) + val trackloss: Option[Timestamp] = Try(e._2.trackloss).getOrElse(None) val followUpEndReason = endReason( - DeathReason(date = e._1._1.deathDate), - DiseaseReason(date = disease), + DeathReason(date = e._1.deathDate), TrackLossReason(date = trackloss), - ObservationEndReason(date = e._1._1.observationEnd) + ObservationEndReason(date = e._1.observationEnd) ) - FollowUp(e._1._1.patientID, followUpEndReason.reason, e._1._1.followUpStart.get, followUpEndReason.date.get) + FollowUp(e._1.patientID, followUpEndReason.reason, e._1.followUpStart.get, followUpEndReason.date.get) }.filter(e => e.end.nonEmpty) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerUtilities.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerUtilities.scala index 143c4145..9425f685 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerUtilities.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerUtilities.scala @@ -1,39 +1,61 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.transformers.follow_up import java.sql.Timestamp -import fr.polytechnique.cmap.cnam.etl.transformers.follow_up.Columns.EndReasons import fr.polytechnique.cmap.cnam.util.datetime.implicits.addMonthsToRichTimestamp - object FollowUpTransformerUtilities { - case class PatientDates( patientID: String, deathDate: Option[Timestamp], followUpStart: Option[Timestamp], observationEnd: Option[Timestamp]) - case class TrackLossDate( patientID: String, trackloss: Option[Timestamp]) case class FollowUpEnd(reason: String, date: Option[Timestamp]) - abstract sealed class FollowUpEndReason { - val reason: String + + sealed trait EndReason extends Enumeration { + val Death, Trackloss, ObservationEnd = Value + + val endReason: String + } + + case object Death extends EndReason { + val endReason = Death + .toString + + } + + case object Trackloss extends EndReason { + val endReason = Trackloss + .toString + + } + + case object ObservationEnd extends EndReason { + val endReason = ObservationEnd + .toString + + } + + + abstract sealed class FollowUpEndReason(val endReason: EndReason) { val date: Option[Timestamp] def compare(that: FollowUpEndReason): Int = { (this.date.get compareTo that.date.get) match { - case 0 => (this.reason, that.reason) match { + case 0 => (this.endReason.endReason, that.endReason.endReason) match { case ("Death", _) => 1 case (_, "Death") => -1 - case ("Disease", "ObservationEnd") => 1 - case ("ObservationEnd", "Disease") => -1 - case (_, _) => 1 + case ("Trackloss", _) => 1 + case (_, "Trackloss") => -1 } case c => c } @@ -48,25 +70,16 @@ object FollowUpTransformerUtilities { def compare(x: Timestamp, y: Timestamp): Int = x compareTo y } - } case class DeathReason( - reason: String = EndReasons.Death.toString, - date: Option[Timestamp]) extends FollowUpEndReason with Ordered[FollowUpEndReason] - - case class DiseaseReason( - reason: String = EndReasons.Disease.toString, - date: Option[Timestamp]) extends FollowUpEndReason with Ordered[FollowUpEndReason] + date: Option[Timestamp]) extends FollowUpEndReason(Death) with Ordered[FollowUpEndReason] case class TrackLossReason( - reason: String = EndReasons.Trackloss.toString, - date: Option[Timestamp]) extends FollowUpEndReason with Ordered[FollowUpEndReason] + date: Option[Timestamp]) extends FollowUpEndReason(Trackloss) with Ordered[FollowUpEndReason] case class ObservationEndReason( - reason: String = EndReasons.ObservationEnd.toString, - date: Option[Timestamp]) extends FollowUpEndReason with Ordered[FollowUpEndReason] - + date: Option[Timestamp]) extends FollowUpEndReason(ObservationEnd) with Ordered[FollowUpEndReason] val correctedStart: (Timestamp, Option[Timestamp], Int) => Option[Timestamp] = (start: Timestamp, end: Option[Timestamp], delayMonths: Int) => { @@ -82,12 +95,10 @@ object FollowUpTransformerUtilities { def endReason( death: DeathReason, - disease: DiseaseReason, trackloss: TrackLossReason, observation: ObservationEndReason): FollowUpEnd = { - val followUpEndReason = Seq(death, disease, trackloss, observation).filter(e => e.date.nonEmpty).min - FollowUpEnd(followUpEndReason.reason, followUpEndReason.date) + val followUpEndReason = Seq(death, trackloss, observation).filter(e => e.date.nonEmpty).min + FollowUpEnd(followUpEndReason.endReason.endReason, followUpEndReason.date) } - } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/DrugSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/DrugSuite.scala index d1bfe88c..02a8e545 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/DrugSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/DrugSuite.scala @@ -19,30 +19,11 @@ class DrugSuite extends SharedContext { ) // When - val result = Drug(patientID, "Drug1", 0.1, timestamp) + val result = Drug(patientID, "Drug1", 0.1, "NA", timestamp) // Then assert(result == expected) } - "fromRow" should "create extract corresponding column values and create Drug event correctly" in { - - // Given - val sqlCtx = sqlContext - import sqlCtx.implicits._ - import util.functions.makeTS - - val inputDF = Seq( - ("patientId", "drugName", 0.1, makeTS(2014, 5, 5)) - ).toDF("pId", "dname", "weigh", "eventDate") - - val expected = Drug("patientId", "drugName", 0.1, makeTS(2014, 5, 5)) - - // When - val result = Drug.fromRow(inputDF.first, "pId", "dname", "weigh") - - // Then - assert(result == expected) - } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala index 4a035fc3..21d51f13 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala @@ -23,19 +23,90 @@ class DrugsExtractorSuite extends SharedContext { import sqlCtx.implicits._ val inputDF = Seq( - ("patient1", Some("9111111111111"), Some(makeTS(2014, 5, 1))), - ("patient2", Some("3400935183644"), Some(makeTS(2014, 6, 1))), - ("patient3", Some("3400935418487"), Some(makeTS(2014, 7, 1))), - ("patient4", Some("3400935183644"), Some(makeTS(2014, 8, 1))), - ("patient8", Some("3400936889651"), Some(makeTS(2014, 9, 1))) - ).toDF("NUM_ENQ", "ER_PHA_F__PHA_PRS_C13", "EXE_SOI_DTD") + ("patient1", Some("9111111111111"), Some( + makeTS( + 2014, + 5, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400935183644"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759"), + ("patient3", Some("3400935418487"), Some( + makeTS( + 2014, + 7, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673400", "1949"), + ("patient4", Some("3400935183644"), Some( + makeTS( + 2014, + 8, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673500", "1733"), + ("patient8", Some("3400936889651"), Some( + makeTS( + 2014, + 9, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673700", "1199") + ).toDF( + "NUM_ENQ", + "ER_PHA_F__PHA_PRS_C13", + "EXE_SOI_DTD", + "FLX_DIS_DTD", + "FLX_TRT_DTD", + "FLX_EMT_TYP", + "FLX_EMT_NUM", + "FLX_EMT_ORD", + "ORG_CLE_NUM", + "DCT_ORD_NUM" + ) val expected: Dataset[Event[Drug]] = Seq( - Drug("patient1", "9111111111111", 1, makeTS(2014, 5, 1)), - Drug("patient2", "3400935183644", 1, makeTS(2014, 6, 1)), - Drug("patient3", "3400935418487", 1, makeTS(2014, 7, 1)), - Drug("patient4", "3400935183644", 1, makeTS(2014, 8, 1)), - Drug("patient8", "3400936889651", 1, makeTS(2014, 9, 1)) + Drug( + "patient1", + "9111111111111", + 1, + "MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", + makeTS(2014, 5, 1) + ), + Drug( + "patient2", + "3400935183644", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", + makeTS(2014, 6, 1) + ), + Drug( + "patient3", + "3400935418487", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM0MDBfMTk0OQ==", + makeTS(2014, 7, 1) + ), + Drug( + "patient4", + "3400935183644", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM1MDBfMTczMw==", + makeTS(2014, 8, 1) + ), + Drug( + "patient8", + "3400936889651", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM3MDBfMTE5OQ==", + makeTS(2014, 9, 1) + ) ).toDS val source = new Sources( @@ -51,9 +122,8 @@ class DrugsExtractorSuite extends SharedContext { ) val drugConf = DrugConfig(Cip13Level, List.empty) - // When - val result = new DrugExtractor(drugConf).extract(source, Set.empty) + val result: Dataset[Event[Drug]] = new DrugExtractor(drugConf).extract(source, Set.empty) // Then assertDSs(result, expected) @@ -66,19 +136,78 @@ class DrugsExtractorSuite extends SharedContext { import sqlCtx.implicits._ val inputDF = Seq( - ("patient1", Some("9111111111111"), Some(makeTS(2014, 5, 1))), - ("patient2", Some("3400935183644"), Some(makeTS(2014, 6, 1))), - ("patient3", Some("3400935418487"), Some(makeTS(2014, 7, 1))), - ("patient4", Some("3400935183644"), Some(makeTS(2014, 8, 1))), - ("patient5", Some("3400936889651"), None), - ("patient6", None, Some(makeTS(2014, 9, 1))), - ("patient8", Some("3400936889651"), Some(makeTS(2014, 9, 1))) - ).toDF("NUM_ENQ", "ER_PHA_F__PHA_PRS_C13", "EXE_SOI_DTD") + ("patient1", Some("9111111111111"), Some( + makeTS( + 2014, + 5, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400935183644"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759"), + ("patient3", Some("3400935418487"), Some( + makeTS( + 2014, + 7, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673400", "1949"), + ("patient4", Some("3400935183644"), Some( + makeTS( + 2014, + 8, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673500", "1733"), + ("patient5", Some("3400936889651"), None, "2014-08-01", "2014-07-17", "1", "17", "0", "01C673700", "1199"), + ("patient6", None, Some(makeTS(2014, 9, 1)), "2014-08-01", "2014-07-11", "1", "17", "0", "01C673700", "1399"), + ("patient8", Some("3400936889651"), Some( + makeTS( + 2014, + 9, + 1 + ) + ), "2014-08-01", "2014-07-12", "1", "17", "0", "01C673700", "1699") + ).toDF( + "NUM_ENQ", + "ER_PHA_F__PHA_PRS_C13", + "EXE_SOI_DTD", + "FLX_DIS_DTD", + "FLX_TRT_DTD", + "FLX_EMT_TYP", + "FLX_EMT_NUM", + "FLX_EMT_ORD", + "ORG_CLE_NUM", + "DCT_ORD_NUM" + ) val expected: Dataset[Event[Drug]] = Seq( - Drug("patient2", "Antidepresseurs", 1, makeTS(2014, 6, 1)), - Drug("patient4", "Antidepresseurs", 1, makeTS(2014, 8, 1)), - Drug("patient8", "Antidepresseurs", 1, makeTS(2014, 9, 1)) + Drug( + "patient2", + "Antidepresseurs", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", + makeTS(2014, 6, 1) + ), + Drug( + "patient4", + "Antidepresseurs", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM1MDBfMTczMw==", + makeTS(2014, 8, 1) + ), + Drug( + "patient8", + "Antidepresseurs", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTEyXzFfMTdfMF8wMUM2NzM3MDBfMTY5OQ==", + makeTS(2014, 9, 1) + ) ).toDS val source = new Sources( @@ -109,13 +238,48 @@ class DrugsExtractorSuite extends SharedContext { import sqlCtx.implicits._ val inputDF = Seq( - ("patient1", Some("9111111111111"), Some(makeTS(2014, 5, 1))), - ("patient2", Some("3400930023648"), Some(makeTS(2014, 6, 1))), - ("patient3", Some("3400935183644"), Some(makeTS(2014, 7, 1))) - ).toDF("NUM_ENQ", "ER_PHA_F__PHA_PRS_C13", "EXE_SOI_DTD") + ("patient1", Some("9111111111111"), Some( + makeTS( + 2014, + 5, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400930023648"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759"), + ("patient3", Some("3400935183644"), Some( + makeTS( + 2014, + 7, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673400", "1949") + ).toDF( + "NUM_ENQ", + "ER_PHA_F__PHA_PRS_C13", + "EXE_SOI_DTD", + "FLX_DIS_DTD", + "FLX_TRT_DTD", + "FLX_EMT_TYP", + "FLX_EMT_NUM", + "FLX_EMT_ORD", + "ORG_CLE_NUM", + "DCT_ORD_NUM" + ) val expected: Dataset[Event[Drug]] = Seq( - Drug("patient2", "Neuroleptiques", 2, makeTS(2014, 6, 1)) + Drug( + "patient2", + "Neuroleptiques", + 2, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", + makeTS(2014, 6, 1) + ) ).toDS val source = new Sources( @@ -144,13 +308,48 @@ class DrugsExtractorSuite extends SharedContext { import sqlCtx.implicits._ val inputDF = Seq( - ("patient1", Some("3400930081143"), Some(makeTS(2014, 6, 1))), - ("patient2", Some("3400936099777"), Some(makeTS(2014, 7, 1))) - ).toDF("NUM_ENQ", "ER_PHA_F__PHA_PRS_C13", "EXE_SOI_DTD") + ("patient1", Some("3400930081143"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400936099777"), Some( + makeTS( + 2014, + 7, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759") + ).toDF( + "NUM_ENQ", + "ER_PHA_F__PHA_PRS_C13", + "EXE_SOI_DTD", + "FLX_DIS_DTD", + "FLX_TRT_DTD", + "FLX_EMT_TYP", + "FLX_EMT_NUM", + "FLX_EMT_ORD", + "ORG_CLE_NUM", + "DCT_ORD_NUM" + ) val expected: Dataset[Event[Drug]] = Seq( - Drug("patient1", "Hypnotiques", 2, makeTS(2014, 6, 1)), - Drug("patient2", "Hypnotiques", 1, makeTS(2014, 7, 1)) + Drug( + "patient1", + "Hypnotiques", + 2, + "MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", + makeTS(2014, 6, 1) + ), + Drug( + "patient2", + "Hypnotiques", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", + makeTS(2014, 7, 1) + ) ).toDS val source = new Sources( @@ -178,12 +377,41 @@ class DrugsExtractorSuite extends SharedContext { import sqlCtx.implicits._ val inputDF = Seq( - ("patient1", Some("3400937354004"), Some(makeTS(2014, 6, 1))), - ("patient2", Some("3400936099777"), Some(makeTS(2014, 7, 1))) - ).toDF("NUM_ENQ", "ER_PHA_F__PHA_PRS_C13", "EXE_SOI_DTD") + ("patient1", Some("3400937354004"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400936099777"), Some( + makeTS( + 2014, + 7, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759") + ).toDF( + "NUM_ENQ", + "ER_PHA_F__PHA_PRS_C13", + "EXE_SOI_DTD", + "FLX_DIS_DTD", + "FLX_TRT_DTD", + "FLX_EMT_TYP", + "FLX_EMT_NUM", + "FLX_EMT_ORD", + "ORG_CLE_NUM", + "DCT_ORD_NUM" + ) val expected: Dataset[Event[Drug]] = Seq( - Drug("patient1", "Antihypertenseurs", 1, makeTS(2014, 6, 1)) + Drug( + "patient1", + "Antihypertenseurs", + 1, + "MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", + makeTS(2014, 6, 1) + ) ).toDS @@ -213,22 +441,99 @@ class DrugsExtractorSuite extends SharedContext { import sqlCtx.implicits._ val inputDF = Seq( - ("patient1", Some("9111111111111"), Some(makeTS(2014, 5, 1))), - ("patient2", Some("3400935183644"), Some(makeTS(2014, 6, 1))), - ("patient3", Some("3400935418487"), Some(makeTS(2014, 7, 1))), - ("patient4", Some("3400935183644"), Some(makeTS(2014, 8, 1))), - ("patient5", Some("3400936889651"), None), - ("patient6", None, Some(makeTS(2014, 9, 1))), - ("patient8", Some("3400936889651"), Some(makeTS(2014, 9, 1))), - ("patient1", Some("9111111111111"), Some(makeTS(2014, 5, 1))), - ("patient2", Some("3400930023648"), Some(makeTS(2014, 6, 1))) - ).toDF("NUM_ENQ", "ER_PHA_F__PHA_PRS_C13", "EXE_SOI_DTD") + ("patient1", Some("9111111111111"), Some( + makeTS( + 2014, + 5, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400935183644"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759"), + ("patient3", Some("3400935418487"), Some( + makeTS( + 2014, + 7, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673400", "1949"), + ("patient4", Some("3400935183644"), Some( + makeTS( + 2014, + 8, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673500", "1733"), + ("patient5", Some("3400936889651"), None, "2014-08-01", "2014-07-17", "1", "17", "0", "01C673700", "1199"), + ("patient6", None, Some(makeTS(2014, 9, 1)), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1959"), + ("patient8", Some("3400936889651"), Some( + makeTS( + 2014, + 9, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "2749"), + ("patient1", Some("9111111111111"), Some( + makeTS( + 2014, + 5, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400930023648"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759") + ).toDF( + "NUM_ENQ", + "ER_PHA_F__PHA_PRS_C13", + "EXE_SOI_DTD", + "FLX_DIS_DTD", + "FLX_TRT_DTD", + "FLX_EMT_TYP", + "FLX_EMT_NUM", + "FLX_EMT_ORD", + "ORG_CLE_NUM", + "DCT_ORD_NUM" + ) val expected: Dataset[Event[Drug]] = Seq( - Drug("patient2", "Antidepresseurs", 1, makeTS(2014, 6, 1)), - Drug("patient4", "Antidepresseurs", 1, makeTS(2014, 8, 1)), - Drug("patient8", "Antidepresseurs", 1, makeTS(2014, 9, 1)), - Drug("patient2", "Neuroleptiques", 1, makeTS(2014, 6, 1)) + Drug( + "patient2", + "Antidepresseurs", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", + makeTS(2014, 6, 1) + ), + Drug( + "patient4", + "Antidepresseurs", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM1MDBfMTczMw==", + makeTS(2014, 8, 1) + ), + Drug( + "patient8", + "Antidepresseurs", + 1, + "MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMjc0OQ==", + makeTS(2014, 9, 1) + ), + Drug( + "patient2", + "Neuroleptiques", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", + makeTS(2014, 6, 1) + ) ).toDS val source = new Sources( @@ -249,7 +554,6 @@ class DrugsExtractorSuite extends SharedContext { val drugConf = DrugConfig(TherapeuticLevel, List(drugConfigAntidepresseurs, drugConfigNeuroleptiques)) // When val result = new DrugExtractor(drugConf).extract(source, Set.empty) - // Then assertDSs(result, expected) } @@ -261,22 +565,99 @@ class DrugsExtractorSuite extends SharedContext { import sqlCtx.implicits._ val inputDF = Seq( - ("patient1", Some("9111111111111"), Some(makeTS(2014, 5, 1))), - ("patient2", Some("3400935183644"), Some(makeTS(2014, 6, 1))), - ("patient3", Some("3400935418487"), Some(makeTS(2014, 7, 1))), - ("patient4", Some("3400935183644"), Some(makeTS(2014, 8, 1))), - ("patient5", Some("3400936889651"), None), - ("patient6", None, Some(makeTS(2014, 9, 1))), - ("patient8", Some("3400936889651"), Some(makeTS(2014, 9, 1))), - ("patient1", Some("9111111111111"), Some(makeTS(2014, 5, 1))), - ("patient2", Some("3400930023648"), Some(makeTS(2014, 6, 1))) - ).toDF("NUM_ENQ", "ER_PHA_F__PHA_PRS_C13", "EXE_SOI_DTD") + ("patient1", Some("9111111111111"), Some( + makeTS( + 2014, + 5, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400935183644"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759"), + ("patient3", Some("3400935418487"), Some( + makeTS( + 2014, + 7, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673400", "1949"), + ("patient4", Some("3400935183644"), Some( + makeTS( + 2014, + 8, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673500", "1733"), + ("patient5", Some("3400936889651"), None, "2014-08-01", "2014-07-17", "1", "17", "0", "01C673700", "1199"), + ("patient6", None, Some(makeTS(2014, 9, 1)), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1959"), + ("patient8", Some("3400936889651"), Some( + makeTS( + 2014, + 9, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "2749"), + ("patient1", Some("9111111111111"), Some( + makeTS( + 2014, + 5, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400930023648"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759") + ).toDF( + "NUM_ENQ", + "ER_PHA_F__PHA_PRS_C13", + "EXE_SOI_DTD", + "FLX_DIS_DTD", + "FLX_TRT_DTD", + "FLX_EMT_TYP", + "FLX_EMT_NUM", + "FLX_EMT_ORD", + "ORG_CLE_NUM", + "DCT_ORD_NUM" + ) val expected: Dataset[Event[Drug]] = Seq( - Drug("patient2", "Antidepresseurs_Tricycliques", 1, makeTS(2014, 6, 1)), - Drug("patient4", "Antidepresseurs_Tricycliques", 1, makeTS(2014, 8, 1)), - Drug("patient8", "Antidepresseurs_ISRS", 1, makeTS(2014, 9, 1)), - Drug("patient2", "Neuroleptiques_Autres_neuroleptiques", 1, makeTS(2014, 6, 1)) + Drug( + "patient2", + "Antidepresseurs_Tricycliques", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", + makeTS(2014, 6, 1) + ), + Drug( + "patient4", + "Antidepresseurs_Tricycliques", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM1MDBfMTczMw==", + makeTS(2014, 8, 1) + ), + Drug( + "patient8", + "Antidepresseurs_ISRS", + 1, + "MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMjc0OQ==", + makeTS(2014, 9, 1) + ), + Drug( + "patient2", + "Neuroleptiques_Autres_neuroleptiques", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", + makeTS(2014, 6, 1) + ) ).toDS val source = new Sources( @@ -309,22 +690,99 @@ class DrugsExtractorSuite extends SharedContext { import sqlCtx.implicits._ val inputDF = Seq( - ("patient1", Some("9111111111111"), Some(makeTS(2014, 5, 1))), - ("patient2", Some("3400935183644"), Some(makeTS(2014, 6, 1))), - ("patient3", Some("3400935418487"), Some(makeTS(2014, 7, 1))), - ("patient4", Some("3400935183644"), Some(makeTS(2014, 8, 1))), - ("patient5", Some("3400936889651"), None), - ("patient6", None, Some(makeTS(2014, 9, 1))), - ("patient8", Some("3400936889651"), Some(makeTS(2014, 9, 1))), - ("patient1", Some("9111111111111"), Some(makeTS(2014, 5, 1))), - ("patient2", Some("3400930023648"), Some(makeTS(2014, 6, 1))) - ).toDF("NUM_ENQ", "ER_PHA_F__PHA_PRS_C13", "EXE_SOI_DTD") + ("patient1", Some("9111111111111"), Some( + makeTS( + 2014, + 5, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400935183644"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759"), + ("patient3", Some("3400935418487"), Some( + makeTS( + 2014, + 7, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673400", "1949"), + ("patient4", Some("3400935183644"), Some( + makeTS( + 2014, + 8, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673500", "1733"), + ("patient5", Some("3400936889651"), None, "2014-08-01", "2014-07-17", "1", "17", "0", "01C673700", "1199"), + ("patient6", None, Some(makeTS(2014, 9, 1)), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1959"), + ("patient8", Some("3400936889651"), Some( + makeTS( + 2014, + 9, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "2749"), + ("patient1", Some("9111111111111"), Some( + makeTS( + 2014, + 5, + 1 + ) + ), "2014-09-01", "2014-07-17", "1", "17", "0", "01C673100", "1749"), + ("patient2", Some("3400930023648"), Some( + makeTS( + 2014, + 6, + 1 + ) + ), "2014-08-01", "2014-07-17", "1", "17", "0", "01C673200", "1759") + ).toDF( + "NUM_ENQ", + "ER_PHA_F__PHA_PRS_C13", + "EXE_SOI_DTD", + "FLX_DIS_DTD", + "FLX_TRT_DTD", + "FLX_EMT_TYP", + "FLX_EMT_NUM", + "FLX_EMT_ORD", + "ORG_CLE_NUM", + "DCT_ORD_NUM" + ) val expected: Dataset[Event[Drug]] = Seq( - Drug("patient2", "N06AA04", 1, makeTS(2014, 6, 1)), - Drug("patient4", "N06AA04", 1, makeTS(2014, 8, 1)), - Drug("patient8", "DEXTROPROPOXYPHENE_PARACETAMOL_CAFEINE", 1, makeTS(2014, 9, 1)), - Drug("patient2", "INSULINE LISPRO (PROTAMINE)", 1, makeTS(2014, 6, 1)) + Drug( + "patient2", + "N06AA04", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", + makeTS(2014, 6, 1) + ), + Drug( + "patient4", + "N06AA04", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM1MDBfMTczMw==", + makeTS(2014, 8, 1) + ), + Drug( + "patient8", + "DEXTROPROPOXYPHENE_PARACETAMOL_CAFEINE", + 1, + "MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMjc0OQ==", + makeTS(2014, 9, 1) + ), + Drug( + "patient2", + "INSULINE LISPRO (PROTAMINE)", + 1, + "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", + makeTS(2014, 6, 1) + ) ).toDS val irPha = Seq( @@ -373,4 +831,6 @@ class DrugsExtractorSuite extends SharedContext { // Then assert(result == expected) } + + } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposurePeriodAdderSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposurePeriodAdderSuite.scala index 883cfcc8..2ceb4f89 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposurePeriodAdderSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposurePeriodAdderSuite.scala @@ -8,7 +8,7 @@ import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event, Exposure, FollowUp} import fr.polytechnique.cmap.cnam.util.functions.makeTS -class ExposurePeriodAdderSuite extends SharedContext{ +class ExposurePeriodAdderSuite extends SharedContext { "toExposure" should "transform drugs to exposure based on the limited adder strategy" in { // Given val sqlCtx = sqlContext @@ -17,11 +17,11 @@ class ExposurePeriodAdderSuite extends SharedContext{ //Given val input: Dataset[Event[Drug]] = Seq( - Drug("patient", "Antidepresseurs", 2, makeTS(2014, 1, 8)), - Drug("patient", "Antidepresseurs", 2, makeTS(2014, 2, 5)), - Drug("patient", "Antidepresseurs", 2, makeTS(2014, 3, 12)), - Drug("patient", "Antidepresseurs", 2, makeTS(2014, 4, 20)), - Drug("patient", "Antidepresseurs", 2, makeTS(2014, 6, 3)) + Drug("patient", "Antidepresseurs", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 1, 8)), + Drug("patient", "Antidepresseurs", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 2, 5)), + Drug("patient", "Antidepresseurs", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 3, 12)), + Drug("patient", "Antidepresseurs", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 4, 20)), + Drug("patient", "Antidepresseurs", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 6, 3)) ).toDS val followUp: Dataset[Event[FollowUp]] = Seq( FollowUp("patient", "any_reason", makeTS(2006, 6, 1), makeTS(2020, 12, 31)), @@ -35,6 +35,7 @@ class ExposurePeriodAdderSuite extends SharedContext{ val exposureAdder = LimitedExposureAdder(0.days, 15.days, 90.days, 30.days, PurchaseCountBased) val result = exposureAdder.toExposure(followUp)(input) + assertDSs(result, expected) } @@ -46,13 +47,13 @@ class ExposurePeriodAdderSuite extends SharedContext{ //Given val input: Dataset[Event[Drug]] = Seq[Event[Drug]]( - Drug("Patient_A", "PIOGLITAZONE", 1, makeTS(2008, 1, 1)), - Drug("Patient_A", "PIOGLITAZONE", 1, makeTS(2008, 2, 1)), - Drug("Patient_A", "PIOGLITAZONE", 1, makeTS(2008, 9, 1)), - Drug("Patient_A", "SULFONYLUREA", 1, makeTS(2009, 3, 1)), - Drug("Patient_A", "SULFONYLUREA", 1, makeTS(2009, 10, 1)), - Drug("Patient_B", "PIOGLITAZONE", 1, makeTS(2009, 1, 1)), - Drug("Patient_B", "BENFLUOREX", 1, makeTS(2007, 1, 1)) + Drug("Patient_A", "PIOGLITAZONE", 1,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2008, 1, 1)), + Drug("Patient_A", "PIOGLITAZONE", 1,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2008, 2, 1)), + Drug("Patient_A", "PIOGLITAZONE", 1,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2008, 9, 1)), + Drug("Patient_A", "SULFONYLUREA", 1,"MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", makeTS(2009, 3, 1)), + Drug("Patient_A", "SULFONYLUREA", 1,"MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", makeTS(2009, 10, 1)), + Drug("Patient_B", "PIOGLITAZONE", 1,"MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM0MDBfMTk0OQ==", makeTS(2009, 1, 1)), + Drug("Patient_B", "BENFLUOREX", 1,"MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM1MDBfMTczMw==", makeTS(2007, 1, 1)) ).toDS val followUp: Dataset[Event[FollowUp]] = Seq[Event[FollowUp]]( @@ -62,44 +63,48 @@ class ExposurePeriodAdderSuite extends SharedContext{ ).toDS() val expected: Dataset[Event[Exposure]] = Seq[Event[Exposure]]( - Exposure("Patient_A", "NA", "PIOGLITAZONE", 1, makeTS(2008, 2, 1), Some(makeTS(2008, 11, 30))) + Exposure("Patient_A", "MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", "PIOGLITAZONE", 1, makeTS(2008, 2, 1), Some(makeTS(2008, 11, 30))) ).toDS() val exposureAdder = UnlimitedExposureAdder(3.months, 2, 6.months) val result = exposureAdder.toExposure(followUp)(input) + assertDSs(result, expected) } - it should "transform drugs to exposure based on the unlimited adder strategy with different parameters" in { - // Given - val sqlCtx = sqlContext - import sqlCtx.implicits._ + it should "transform drugs to exposure based on the unlimited adder strategy with different parameters" in { + // Given + val sqlCtx = sqlContext + import sqlCtx.implicits._ - //Given + //Given - val input: Dataset[Event[Drug]] = Seq[Event[Drug]]( - Drug("Patient_A", "PIOGLITAZONE", 1, makeTS(2008, 1, 1)), - Drug("Patient_A", "PIOGLITAZONE", 1, makeTS(2008, 2, 1)), - Drug("Patient_A", "PIOGLITAZONE", 1, makeTS(2008, 9, 1)), - Drug("Patient_A", "SULFONYLUREA", 1, makeTS(2009, 3, 1)), - Drug("Patient_A", "SULFONYLUREA", 1, makeTS(2009, 10, 1)), - Drug("Patient_B", "PIOGLITAZONE", 1, makeTS(2009, 1, 1)), - Drug("Patient_B", "BENFLUOREX", 1, makeTS(2007, 1, 1)) - ).toDS + val input: Dataset[Event[Drug]] = Seq[Event[Drug]]( + Drug("Patient_A", "PIOGLITAZONE", 1,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2008, 1, 1)), + Drug("Patient_A", "PIOGLITAZONE", 1,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2008, 2, 1)), + Drug("Patient_A", "PIOGLITAZONE", 1,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2008, 9, 1)), + Drug("Patient_A", "SULFONYLUREA", 1,"MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", makeTS(2009, 3, 1)), + Drug("Patient_A", "SULFONYLUREA", 1,"MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMyMDBfMTc1OQ==", makeTS(2009, 10, 1)), + Drug("Patient_B", "PIOGLITAZONE", 1,"MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM0MDBfMTk0OQ==", makeTS(2009, 1, 1)), + Drug("Patient_B", "BENFLUOREX", 1,"MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM1MDBfMTczMw==", makeTS(2007, 1, 1)) + ).toDS - val followUp: Dataset[Event[FollowUp]] = Seq[Event[FollowUp]]( - FollowUp("Patient_A", "any_reason", makeTS(2006, 6, 1), makeTS(2008, 11, 30)), - FollowUp("Patient_B", "any_reason", makeTS(2006, 7, 1), makeTS(2007, 7, 1)), - FollowUp("Patient_C", "any_reason", makeTS(2006, 8, 1), makeTS(2009, 12, 31)) - ).toDS() - val expected: Dataset[Event[Exposure]] = Seq[Event[Exposure]]( - Exposure("Patient_A", "NA", "PIOGLITAZONE", 1, makeTS(2008, 1, 1), Some(makeTS(2008, 11, 30))), - Exposure("Patient_B", "NA", "BENFLUOREX", 1, makeTS(2007, 1, 1), Some(makeTS(2007, 7, 1))) - ).toDS() - val exposureAdder = UnlimitedExposureAdder(0.months, 1, 0.months) + val followUp: Dataset[Event[FollowUp]] = Seq[Event[FollowUp]]( + FollowUp("Patient_A", "any_reason", makeTS(2006, 6, 1), makeTS(2008, 11, 30)), + FollowUp("Patient_B", "any_reason", makeTS(2006, 7, 1), makeTS(2007, 7, 1)), + FollowUp("Patient_C", "any_reason", makeTS(2006, 8, 1), makeTS(2009, 12, 31)) + ).toDS() + + val expected: Dataset[Event[Exposure]] = Seq[Event[Exposure]]( + Exposure("Patient_A", "MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", "PIOGLITAZONE", 1, makeTS(2008, 1, 1), Some(makeTS(2008, 11, 30))), + Exposure("Patient_B", "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM1MDBfMTczMw==", "BENFLUOREX", 1, makeTS(2007, 1, 1), Some(makeTS(2007, 7, 1))) + ).toDS() + val exposureAdder = UnlimitedExposureAdder(0.months, 1, 0.months) + + val result = exposureAdder.toExposure(followUp)(input) + + assertDSs(result, expected) + } - val result = exposureAdder.toExposure(followUp)(input) - assertDSs(result, expected) - } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposureTransformerSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposureTransformerSuite.scala index 817bb3fa..c99a8d46 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposureTransformerSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposureTransformerSuite.scala @@ -9,6 +9,7 @@ import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event, Exposure, FollowUp} import fr.polytechnique.cmap.cnam.util.functions.makeTS class ExposureTransformerSuite extends SharedContext { + "toExposure" should "transform drugs to exposure based on parameters" in { // Given val sqlCtx = sqlContext @@ -16,10 +17,10 @@ class ExposureTransformerSuite extends SharedContext { //Given val input: Dataset[Event[Drug]] = Seq( - Drug("Patient_A", "Antidepresseurs", 2, makeTS(2014, 6, 8)), - Drug("Patient_A", "Antidepresseurs", 2, makeTS(2014, 7, 1)), - Drug("Patient_B", "Antidepresseurs", 2, makeTS(2014, 2, 5)), - Drug("Patient_B", "Antidepresseurs", 2, makeTS(2014, 9, 1)) + Drug("Patient_A", "Antidepresseurs", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 6, 8)), + Drug("Patient_A", "Antidepresseurs", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 7, 1)), + Drug("Patient_B", "Antidepresseurs", 2,"MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM0MDBfMTk0OQ==", makeTS(2014, 2, 5)), + Drug("Patient_B", "Antidepresseurs", 2,"MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzM0MDBfMTk0OQ==", makeTS(2014, 9, 1)) ).toDS val followUp: Dataset[Event[FollowUp]] = Seq( FollowUp("Patient_A", "any_reason", makeTS(2014, 6, 1), makeTS(2016, 12, 31)), @@ -44,6 +45,7 @@ class ExposureTransformerSuite extends SharedContext { ) val result = exposureTransformer.transform(followUp)(input) + assertDSs(expected, result) } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerSuite.scala index b34ebfbb..f34c8632 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerSuite.scala @@ -2,14 +2,12 @@ package fr.polytechnique.cmap.cnam.etl.transformers.follow_up -import java.sql.Timestamp import scala.util.Try -import org.mockito.Mockito.mock import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.patients.Patient -import fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformerUtilities.{DeathReason, DiseaseReason, FollowUpEnd, ObservationEndReason, PatientDates, TrackLossDate, TrackLossReason, endReason, tracklossDateCorrected} +import fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformerUtilities.{DeathReason, FollowUpEnd, ObservationEndReason, PatientDates, TrackLossDate, TrackLossReason, endReason, tracklossDateCorrected} import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -160,25 +158,17 @@ class FollowUpTransformerSuite extends SharedContext { import sqlCtx.implicits._ // Given - val input: Dataset[(PatientDates, TrackLossDate, Event[Outcome])] = Seq( - (PatientDates("Patient_A", Some(makeTS(2015, 2, 1)), Some(makeTS(2006, 2, 1)), Some(makeTS(2009, 6, 30))), - TrackLossDate("Patient_A", Some(makeTS(2014, 5, 1))), Outcome( - "Patient_A", - "bladder_cancer", - makeTS(2007, 9, 1) - )) + val input: Dataset[(PatientDates, TrackLossDate)] = Seq( + (PatientDates("Patient_A", Some(makeTS(2015, 2, 1)), Some(makeTS(2006, 2, 1)), Some(makeTS(2014, 5, 1))), + TrackLossDate("Patient_A", Some(makeTS(2014, 5, 1)))) , (PatientDates("Patient_B", Some(makeTS(2016, 2, 1)), Some(makeTS(2006, 1, 1)), Some(makeTS(2012, 6, 30))), - TrackLossDate("Patient_B", Some(makeTS(2010, 2, 1))), Outcome( - "Patient_B", - "bladder_cancer", - makeTS(2011, 4, 1) - )), - (PatientDates("Patient_C", Some(makeTS(2017, 2, 1)), Some(makeTS(2006, 8, 1)), None), - TrackLossDate("Patient_C", None), mock(classOf[Event[Outcome]])), + TrackLossDate("Patient_B", Some(makeTS(2010, 2, 1)))), + (PatientDates("Patient_C", Some(makeTS(2017, 2, 1)), Some(makeTS(2006, 8, 1)), Some(makeTS(2017, 2, 1))), + TrackLossDate("Patient_C", None)), (PatientDates("Patient_D", Some(makeTS(2018, 2, 1)), Some(makeTS(2007, 10, 1)), Some(makeTS(2013, 6, 30))), - TrackLossDate("Patient_D", Some(makeTS(2017, 9, 1))), Outcome("Patient_D", "cancer", makeTS(2013, 6, 30))) + TrackLossDate("Patient_D", Some(makeTS(2017, 9, 1)))) ).toDS // When @@ -186,18 +176,16 @@ class FollowUpTransformerSuite extends SharedContext { .map { e => endReason( DeathReason(date = e._1.deathDate), - DiseaseReason(date = Try(Option(e._3.start)).getOrElse(None)), TrackLossReason(date = Try(e._2.trackloss).getOrElse(None)), ObservationEndReason(date = e._1.observationEnd) ) } - val expected: Dataset[FollowUpEnd] = Seq( - FollowUpEnd("Disease", Some(makeTS(2007, 9, 1))), + FollowUpEnd("Trackloss", Some(makeTS(2014, 5, 1))), FollowUpEnd("Trackloss", Some(makeTS(2010, 2, 1))), FollowUpEnd("Death", Some(makeTS(2017, 2, 1))), - FollowUpEnd("Disease", Some(makeTS(2013, 6, 30))) + FollowUpEnd("ObservationEnd", Some(makeTS(2013, 6, 30))) ).toDS @@ -307,7 +295,7 @@ class FollowUpTransformerSuite extends SharedContext { ).toDS val expected = Seq( - FollowUp("Regis", "Disease", makeTS(2006, 3, 1), makeTS(2007, 9, 1)), + FollowUp("Regis", "ObservationEnd", makeTS(2006, 3, 1), makeTS(2009, 1, 1)), FollowUp("pika", "Death", makeTS(2006, 3, 1), makeTS(2008, 10, 1)), FollowUp("patient03", "ObservationEnd", makeTS(2006, 3, 1), makeTS(2009, 1, 1)) ).toDS From b53a4d6fd1db6da19b29a7ab4cb31bad432e7268 Mon Sep 17 00:00:00 2001 From: Angel Francisco Orta Date: Tue, 21 Jan 2020 09:41:55 +0100 Subject: [PATCH 02/38] CNAM-432: Add comments and refactor unused methods CNAM-432: Corrected some documentation CNAM-432: Corrected failed test CNAM-432: Text document reformulation --- .../cmap/cnam/etl/events/FollowUp.scala | 31 +++--- .../cnam/etl/events/ObservationPeriod.scala | 26 ++--- .../etl/extractors/dcir/DcirExtractor.scala | 32 +++--- .../cnam/etl/extractors/dcir/DcirSource.scala | 1 + .../etl/extractors/drugs/DrugExtractor.scala | 4 +- .../follow_up/FollowUpTransformer.scala | 40 +++++++- .../FollowUpTransformerUtilities.scala | 97 +++++++++++++++---- .../transformers/observation/Columns.scala | 5 +- .../ObservationPeriodTransformer.scala | 18 +++- .../cmap/cnam/etl/events/FollowUpSuite.scala | 19 ---- 10 files changed, 179 insertions(+), 94 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUp.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUp.scala index f30ac57b..035d355f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUp.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUp.scala @@ -5,31 +5,22 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp import org.apache.spark.sql.Row - +/** Factory for FollowUp instances. */ object FollowUp extends FollowUp - +/** This trait stores the methods required to create an Event object of type FollowUp. */ trait FollowUp extends AnyEvent with EventBuilder { val category: EventCategory[FollowUp] = "follow_up" - def fromRow( - r: Row, - patientIDCol: String = "patientID", - endReason: String = "endReason", - startCol: String = "start", - endCol: String = "end"): Event[FollowUp] = { - - FollowUp( - r.getAs[String](patientIDCol), - r.getAs[String](endReason), - r.getAs[Timestamp](startCol), - r.getAs[Timestamp](endCol) - ) - } - - + /** Creates un Event object of type FollowUp using a map function to map a dataset. + * + * @param patientID The value patientID from dataset. + * @param endReason The value endReason from dataset. + * @param start The value start from dataset. + * @param end The value end from dataset. + * @return Event[FollowUp]. + */ def apply(patientID: String, endReason: String, start: Timestamp, end: Timestamp): Event[FollowUp] = Event(patientID, category, groupID = "NA", endReason, weight = 0D, start, Some(end)) -} - +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/ObservationPeriod.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/ObservationPeriod.scala index a3f8c16a..b81b6475 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/ObservationPeriod.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/ObservationPeriod.scala @@ -1,28 +1,22 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row +/** Factory for ObservationPeriod instances. */ object ObservationPeriod extends ObservationPeriod +/** This trait stores the methods required to create an Event object of type ObservationPeriod. */ trait ObservationPeriod extends AnyEvent with EventBuilder { val category: EventCategory[ObservationPeriod] = "observation_period" - def fromRow( - r: Row, - patientIDCol: String = "patientID", - startCol: String = "start", - endCol: String = "end"): Event[ObservationPeriod] = { - - ObservationPeriod( - r.getAs[String](patientIDCol), - r.getAs[Timestamp](startCol), - r.getAs[Timestamp](endCol) - ) - } - - + /** Creates un Event object of type ObservationPeriod using a map function to map a dataset. + * + * @param patientID The value patientID from dataset. + * @param start The value start from dataset. + * @param end The value end from dataset. + * @return Event[ObservationPeriod]. + */ def apply(patientID: String, start: Timestamp, end: Timestamp): Event[ObservationPeriod] = Event(patientID, category, groupID = "NA", value = "NA", weight = 0D, start, Some(end)) -} +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala index 8ccf21d4..a70699df 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala @@ -44,19 +44,27 @@ trait DcirExtractor[EventType <: AnyEvent] extends Extractor[EventType] with Dci def extractFluxDate(r: Row): Timestamp = r.getAs[java.util.Date](ColNames.DcirFluxDate).toTimestamp + /** Method to generate a hash value in a string format for the groupID value from a row with these values + * FLX_DIS_DTD,FLX_TRT_DTD,FLX_EMT_TYP,FLX_EMT_NUM,FLX_EMT_ORD,ORG_CLE_NUM,DCT_ORD_NUM. + * They are the 7 columns that identifies prescriptions in a unique way. + * + * @param r The Row object itself. + * @return A hash Id unique in a string format. + */ override def extractGroupId(r: Row): String = { - Base64.encodeBase64(s"${r.getAs[String](ColNames.DateStart)}_${r.getAs[String](ColNames.DateEntry)}_${ - r.getAs[String]( - ColNames - .EmitterType - ) - }_${r.getAs[String](ColNames.EmitterId)}_${r.getAs[String](ColNames.FlowNumber)}_${ - r.getAs[String]( - ColNames - .OrgId - ) - }_${r.getAs[String](ColNames.OrderId)}".getBytes()).map(_.toChar).mkString - + Base64.encodeBase64( + s"${r.getAs[String](ColNames.DateStart)}_${r.getAs[String](ColNames.DateEntry)}_${ + r.getAs[String]( + ColNames + .EmitterType + ) + }_${r.getAs[String](ColNames.EmitterId)}_${r.getAs[String](ColNames.FlowNumber)}_${ + r.getAs[String]( + ColNames + .OrgId + ) + }_${r.getAs[String](ColNames.OrderId)}".getBytes() + ).map(_.toChar).mkString } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala index b48edfb9..2a8aac6b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala @@ -2,6 +2,7 @@ package fr.polytechnique.cmap.cnam.etl.extractors.dcir import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames +/** Trait to retrieve the columns of dcir dataframe. */ trait DcirSource extends ColumnNames { final object ColNames extends Serializable { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala index b4362537..a79e5b03 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala @@ -39,8 +39,8 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { * It allows to identify each prescription in a unique way, it can be used to identify * the possible interactions of molecules prescript in the same period. * - * @param r The Row object itself - * @return A hash Id unique in a string format + * @param r The Row object itself. + * @return A hash Id unique in a string format. */ def extractGroupId(r: Row): String = { Base64.encodeBase64( diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala index bf217289..9a64398e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala @@ -9,10 +9,25 @@ import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.patients.Patient - +/** It allows create a followUp dataset using the dataset of + * Dataset[(Patient, Event[ObservationPeriod])], + * Dataset[Event[Molecule]] ( This dataset is not used in the treatment and it should be removed in further versions), + * Dataset[Event[Outcome]]( This dataset is not used in the treatment and it should be removed in further versions) and + * Dataset[Event[Trackloss]]. + * + * @param config A config object tha contains the need values to set the parameters of study. + */ class FollowUpTransformer(config: FollowUpTransformerConfig) { - + /** The main method in this transformation class, It combines multiple basic Events to form a FollowUp Dataset. + * + * @param patients A dataset that contains a dataset of [[fr.polytechnique.cmap.cnam.etl.patients.Patient]] joined with + * a dataset of [[fr.polytechnique.cmap.cnam.etl.events.ObservationPeriod]]. + * @param dispensations A dataset of [[fr.polytechnique.cmap.cnam.etl.events.Molecule]]. + * @param outcomes A dataset of [[fr.polytechnique.cmap.cnam.etl.events.Outcome]]. + * @param tracklosses A dataset of [[fr.polytechnique.cmap.cnam.etl.events.Trackloss]]. + * @return A dataset of Event[FollowUp] type ([[fr.polytechnique.cmap.cnam.etl.events.FollowUp]]). + */ def transform( patients: Dataset[(Patient, Event[ObservationPeriod])], dispensations: Dataset[Event[Molecule]], @@ -26,6 +41,12 @@ class FollowUpTransformer(config: FollowUpTransformerConfig) { val delayMonths = config.delayMonths + /** It takes the Dataset[(Patient, Event[ObservationPeriod])] and perform several transformations: + * 1. Extract th patientId value. + * 2. Correct the observationPeriod start date plus delayMonth value comparing to observationPeriod end date. + * 3. Calculate the min of dates. + * 4 Return a PatientDates dataset [[fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformerUtilities.PatientDates]]. + */ val patientDates: Dataset[PatientDates] = patients .map { e => PatientDates( @@ -51,6 +72,12 @@ class FollowUpTransformer(config: FollowUpTransformerConfig) { ) ) + /** It takes patientDates dataset and join with tracklosses dataset performing the algorithm as follow: + * 1. Extract the patientId value and correct the trackloss date comparing with followUpStart date. + * 2. Filter corrected empty dates. + * 3. Extract the min of tracklossDate. + * 4. Return a TrackLossDate [[fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformerUtilities.TrackLossDate]]. + */ val tracklossDates: Dataset[TrackLossDate] = patientDates .joinWith(tracklosses, tracklosses.col(PatientID) === patientDates.col(PatientID)) .map(e => TrackLossDate(e._2.patientID, tracklossDateCorrected(e._2.start, e._1.followUpStart.get))) @@ -61,6 +88,14 @@ class FollowUpTransformer(config: FollowUpTransformerConfig) { ) .map(e => TrackLossDate(e.getAs[String](PatientID), Option(e.getAs[Timestamp](TracklossDate)))) + /** It joins patientDates dataset with tracklossDates dataset carrying out the following algorithm : + * 1. Retrieve the trackloss date if exist, None otherwise. + * 2. Using the death date, the trackloss date and the observation end date it calculates through + * [[fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformerUtilities.endReason]] the follow up's + * end date and reason. + * 3. It filters the empty followUp end dates. + * 4. Return a FollowUp [[fr.polytechnique.cmap.cnam.etl.events.FollowUp]]. + */ patientDates .joinWith(tracklossDates, tracklossDates.col(PatientID) === patientDates.col(PatientID), "left_outer") .map { e => @@ -73,6 +108,5 @@ class FollowUpTransformer(config: FollowUpTransformerConfig) { ) FollowUp(e._1.patientID, followUpEndReason.reason, e._1.followUpStart.get, followUpEndReason.date.get) }.filter(e => e.end.nonEmpty) - } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerUtilities.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerUtilities.scala index 9425f685..63fd76d5 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerUtilities.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformerUtilities.scala @@ -5,49 +5,81 @@ package fr.polytechnique.cmap.cnam.etl.transformers.follow_up import java.sql.Timestamp import fr.polytechnique.cmap.cnam.util.datetime.implicits.addMonthsToRichTimestamp +/** Factory for FollowUp utilities. */ object FollowUpTransformerUtilities { - case class PatientDates( + /** It saves patients with need dates. + * + * @param patientID The value patientID from dataset. + * @param deathDate The value deathDate from dataset. + * @param followUpStart The value followUpStart from dataset. + * @param observationEnd The value observationEnd from dataset. + */ + private[follow_up] case class PatientDates( patientID: String, deathDate: Option[Timestamp], followUpStart: Option[Timestamp], observationEnd: Option[Timestamp]) - case class TrackLossDate( + /** It saves patients with their trackloss date. + * + * @param patientID The value patientID from dataset. + * @param trackloss The value trackloss from dataset. + */ + private[follow_up] case class TrackLossDate( patientID: String, trackloss: Option[Timestamp]) - case class FollowUpEnd(reason: String, date: Option[Timestamp]) + /** It saves the follow up's end's reason and its date. + * + * @param reason A string value of the reason of the end. + * @param date The date of the end of follow up. + */ + private[follow_up] case class FollowUpEnd(reason: String, date: Option[Timestamp]) - - sealed trait EndReason extends Enumeration { + /** It stores the list of reasons. */ + private[follow_up] sealed trait EndReason extends Enumeration { val Death, Trackloss, ObservationEnd = Value - val endReason: String } - case object Death extends EndReason { + /** It's an object to store Death as endReason. */ + private[follow_up] case object Death extends EndReason { val endReason = Death .toString - } - case object Trackloss extends EndReason { + /** It's an object to store Trackloss as endReason. */ + private[follow_up] case object Trackloss extends EndReason { val endReason = Trackloss .toString - } - case object ObservationEnd extends EndReason { + /** It's an object to store ObservationEnd as endReason. */ + private[follow_up] case object ObservationEnd extends EndReason { val endReason = ObservationEnd .toString - } - - abstract sealed class FollowUpEndReason(val endReason: EndReason) { + /** It's need to store the compare method + * which allow use the min function with + * the FollowUpEndReason class type. + * + * @param endReason An object of type EndReason. + */ + private[follow_up] abstract sealed class FollowUpEndReason(val endReason: EndReason) { val date: Option[Timestamp] + /** It takes a FollowUpEndReason type class and compare the dates, + * if they are the same, then compare the end reasons to return + * the correct according to priority. + * If two reasons are the same date, + * Death is the first option, if Death is not present, the second one is Trackloss + * and the third option is ObservationEnd. + * + * @param that The class of FollowUpEndReason type. + * @return The correct FollowUpEndReason class. + */ def compare(that: FollowUpEndReason): Int = { (this.date.get compareTo that.date.get) match { @@ -62,37 +94,64 @@ object FollowUpTransformerUtilities { } } + /** It stores the implicit Ordering needed to use the min function in FollowUpEndReason types class. */ object FollowUpEndReason { - implicit def ord[A <: FollowUpEndReason]: Ordering[A] = Ordering.by((_: A).date.get) + import fr.polytechnique.cmap.cnam.util.datetime.implicits.ordered - implicit def ordered: Ordering[Timestamp] = new Ordering[Timestamp] { - def compare(x: Timestamp, y: Timestamp): Int = x compareTo y - } + /** Implicit ordering for the timestamps in FollowUpEndReason type case class. + * + * The filter to avoid empty dates its mandatory. + * Example: Seq(death, disease, trackloss, observation).filter(e => e.date.nonEmpty).min + * + */ + implicit def ord[A <: FollowUpEndReason]: Ordering[A] = Ordering.by((_: A).date.get) } + /** It stores death reason and its date. + * + * @param date The value deathDate from dataset. + */ case class DeathReason( date: Option[Timestamp]) extends FollowUpEndReason(Death) with Ordered[FollowUpEndReason] + /** It stores trackloss reason and its date. + * + * @param date The value trackloss from dataset TrackLossDate. + */ case class TrackLossReason( date: Option[Timestamp]) extends FollowUpEndReason(Trackloss) with Ordered[FollowUpEndReason] + /** It stores observation end reason and its date. + * + * @param date The value observationEnd from dataset PatientDates. + */ case class ObservationEndReason( date: Option[Timestamp]) extends FollowUpEndReason(ObservationEnd) with Ordered[FollowUpEndReason] + /** It returns the date later add delayMonths value from + * config passes through FollowUpTransformer class. + */ val correctedStart: (Timestamp, Option[Timestamp], Int) => Option[Timestamp] = (start: Timestamp, end: Option[Timestamp], delayMonths: Int) => { val st: Timestamp = addMonthsToRichTimestamp(delayMonths, start) if (st.before(end.get)) Some(st) else None - } + /** It returns start date when after follow Up Start otherwise None. */ val tracklossDateCorrected: (Timestamp, Timestamp) => Option[Timestamp] = (start: Timestamp, followUpStart: Timestamp) => { if (start.after(followUpStart)) Some(start) else None } + /** It takes all FollowUpEndReason type class and return the min of them. + * + * @param death A DeathReason class. + * @param trackloss A TrackLossReason class. + * @param observation A ObservationEndReason class. + * @return FollowUpEnd class. + */ def endReason( death: DeathReason, trackloss: TrackLossReason, diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/Columns.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/Columns.scala index 2eb75959..849c97fb 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/Columns.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/Columns.scala @@ -2,7 +2,10 @@ package fr.polytechnique.cmap.cnam.etl.transformers.observation import fr.polytechnique.cmap.cnam.etl.events.Event -private[observation] object Columns { +/** Private object for the package [[fr.polytechnique.cmap.cnam.etl.transformers.observation]] + * to retrieve the columns of event object of type ObservationPeriod. + * */ +private[observation] object Columns { final val PatientID = Event.Columns.PatientID final val Start = Event.Columns.Start diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/ObservationPeriodTransformer.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/ObservationPeriodTransformer.scala index a25544b5..7679bfed 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/ObservationPeriodTransformer.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/ObservationPeriodTransformer.scala @@ -8,10 +8,21 @@ import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, Molecule, ObservationPeriod} import fr.polytechnique.cmap.cnam.util.datetime.implicits._ + +/** It allows create a observationPeriod dataset using a dataset of type + * [[fr.polytechnique.cmap.cnam.etl.events.AnyEvent]]. + * + * @param config A config file that contains the need values to create the ObservationPeriod transformer. + */ class ObservationPeriodTransformer(config: ObservationPeriodTransformerConfig) { import Columns._ + /** The main method in this transformation class, it allows the transformation into an ObservationPeriod dataset. + * + * @param events A dataset of [[fr.polytechnique.cmap.cnam.etl.events.AnyEvent]]. + * @return A dataset of Event[ObservationPeriod] type ([[fr.polytechnique.cmap.cnam.etl.events.ObservationPeriod]]). + */ def transform(events: Dataset[Event[AnyEvent]]): Dataset[Event[ObservationPeriod]] = { val studyStart: Timestamp = config.studyStart @@ -20,6 +31,11 @@ class ObservationPeriodTransformer(config: ObservationPeriodTransformerConfig) { import events.sqlContext.implicits._ + /** It takes the events dataset and apply the following algorithm: + * 1. Filter by category equal to ''molecule'' and start date before of study start date. + * 2. Calculate the min of start date. + * 3. Return an ObservationPeriod [[fr.polytechnique.cmap.cnam.etl.events.ObservationPeriod]]. + */ events.filter( e => e.category == Molecule.category && (e.start .compareTo(studyStart) >= 0) @@ -32,8 +48,6 @@ class ObservationPeriodTransformer(config: ObservationPeriodTransformerConfig) { e.getAs[Timestamp](Start), studyEnd ) - ) - } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUpSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUpSuite.scala index a375f656..4538461d 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUpSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUpSuite.scala @@ -23,23 +23,4 @@ class FollowUpSuite extends AnyFlatSpec { // Then assert(result == expected) } - - "fromRow" should "allow creation of a FollowUp event from a row object" in { - // Given - val schema = StructType( - StructField("pID", StringType) :: - StructField("endR", StringType) :: - StructField("start", TimestampType) :: - StructField("end", TimestampType) :: Nil - ) - val values = Array[Any]("Patient01", "any_reason", makeTS(2010, 1, 1), makeTS(2010, 2, 1)) - val r = new GenericRowWithSchema(values, schema) - val expected = FollowUp("Patient01", "any_reason", makeTS(2010, 1, 1), makeTS(2010, 2, 1)) - - // When - val result = FollowUp.fromRow(r, "pID", "endR", "start", "end") - - // Then - assert(result == expected) - } } From 97c85aec6cb1e53cc39e6f3657c365dd481a1e35 Mon Sep 17 00:00:00 2001 From: Angel Francisco Orta Date: Thu, 23 Jan 2020 11:38:03 +0100 Subject: [PATCH 03/38] CNAM-436: Remove fromRow methods --- .../cmap/cnam/etl/events/Classification.scala | 15 ------- .../cmap/cnam/etl/events/Diagnosis.scala | 34 ---------------- .../cmap/cnam/etl/events/Exposure.scala | 17 -------- .../cmap/cnam/etl/events/HospitalStay.scala | 13 ------ .../cmap/cnam/etl/events/MedicalAct.scala | 16 -------- .../etl/events/MedicalTakeOverReason.scala | 16 -------- .../cmap/cnam/etl/events/Molecule.scala | 15 ------- .../cmap/cnam/etl/events/Outcome.scala | 28 ------------- .../events/PractionnerClaimSpeciality.scala | 13 ------ .../cmap/cnam/etl/events/Trackloss.scala | 8 ---- .../extractors/tracklosses/Tracklosses.scala | 3 +- .../cnam/etl/events/ClassificationSuite.scala | 24 ----------- .../cmap/cnam/etl/events/DiagnosisSuite.scala | 40 ------------------- .../cmap/cnam/etl/events/ExposureSuite.scala | 20 ---------- .../cnam/etl/events/HospitalStaySuite.scala | 23 ----------- .../cnam/etl/events/MedicalActSuite.scala | 21 ---------- .../events/MedicalTakeOverReasonSuite.scala | 21 ---------- .../cmap/cnam/etl/events/MoleculeSuite.scala | 19 --------- .../PractionnerClaimSpecialitySuite.scala | 20 ---------- .../transformers/outcomes/OutcomeSuite.scala | 37 ----------------- 20 files changed, 2 insertions(+), 401 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Classification.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Classification.scala index 9f2b0061..947290a9 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Classification.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Classification.scala @@ -10,21 +10,6 @@ trait Classification extends AnyEvent with EventBuilder { val category: EventCategory[Classification] - def fromRow( - r: Row, - patientIDCol: String = "patientID", - nameCol: String = "name", - groupIDCol: String = "groupID", - dateCol: String = "eventDate") - : Event[Classification] = { - apply( - r.getAs[String](patientIDCol), - r.getAs[String](groupIDCol), - r.getAs[String](nameCol), - r.getAs[Timestamp](dateCol) - ) - } - def apply( patientID: String, groupID: String, diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala index ba063754..7707eee5 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala @@ -9,48 +9,14 @@ trait Diagnosis extends AnyEvent with EventBuilder { val category: EventCategory[Diagnosis] - def fromRow(r: Row, patientIDCol: String, codeCol: String, dateCol: String): Event[Diagnosis] = { - apply(r.getAs[String](patientIDCol), r.getAs[String](codeCol), r.getAs[Timestamp](dateCol)) - } - def apply(patientID: String, code: String, date: Timestamp): Event[Diagnosis] = { Event(patientID, category, groupID = "NA", code, 0.0, date, None) } - def fromRow( - r: Row, - patientIDCol: String = "patientID", - groupIDCol: String = "groupID", - codeCol: String = "code", - dateCol: String = "eventDate"): Event[Diagnosis] = { - apply( - r.getAs[String](patientIDCol), - r.getAs[String](groupIDCol), - r.getAs[String](codeCol), - r.getAs[Timestamp](dateCol) - ) - } - def apply(patientID: String, groupID: String, code: String, date: Timestamp): Event[Diagnosis] = { Event(patientID, category, groupID, code, 0.0, date, None) } - def fromRow( - r: Row, - patientIDCol: String, - groupIDCol: String, - codeCol: String, - weightCol: String, - dateCol: String): Event[Diagnosis] = { - apply( - r.getAs[String](patientIDCol), - r.getAs[String](groupIDCol), - r.getAs[String](codeCol), - r.getAs[Double](weightCol), - r.getAs[Timestamp](dateCol) - ) - } - def apply(patientID: String, groupID: String, code: String, weight: Double, date: Timestamp): Event[Diagnosis] = { Event(patientID, category, groupID, code, weight, date, None) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Exposure.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Exposure.scala index 5aaa4e45..e0b3a757 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Exposure.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Exposure.scala @@ -11,23 +11,6 @@ trait Exposure extends AnyEvent with EventBuilder { val category: EventCategory[Exposure] = "exposure" - def fromRow( - r: Row, - patientIDCol: String = "patientID", - nameCol: String = "name", - weightCol: String = "weight", - startCol: String = "start", - endCol: String = "end"): Event[Exposure] = { - - Exposure( - r.getAs[String](patientIDCol), - r.getAs[String](nameCol), - r.getAs[Double](weightCol), - r.getAs[Timestamp](startCol), - r.getAs[Timestamp](endCol) - ) - } - def apply( patientID: String, molecule: String, weight: Double, start: Timestamp, end: Timestamp ): Event[Exposure] = Event(patientID, category, groupID = "NA", molecule, weight, start, Some(end)) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala index 0af3ef0c..6c23c1f6 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala @@ -10,19 +10,6 @@ trait HospitalStay extends AnyEvent with EventBuilder { override val category: EventCategory[HospitalStay] = "hospital_stay" - def fromRow( - r: Row, - patientIDCol: String = PatientID, - hospitalIDCol: String = Value, - startCol: String = Start, - endCol: String = End): Event[HospitalStay] = - apply( - r.getAs[String](patientIDCol), - r.getAs[String](hospitalIDCol), - r.getAs[Timestamp](startCol), - r.getAs[Timestamp](endCol) - ) - def apply(patientID: String, hospitalID: String, start: Timestamp, end: Timestamp): Event[HospitalStay] = apply(patientID, hospitalID, hospitalID, 0D, start, Some(end)) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala index 082c9dbc..03d3f736 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala @@ -9,22 +9,6 @@ trait MedicalAct extends AnyEvent with EventBuilder { override val category: EventCategory[MedicalAct] - def fromRow( - r: Row, - patientIDCol: String = "patientID", - groupIDCol: String = "groupID", - codeCol: String = "code", - weightCol: String = "weight", - dateCol: String = "eventDate"): Event[MedicalAct] = { - this.apply( - r.getAs[String](patientIDCol), - r.getAs[String](groupIDCol), - r.getAs[String](codeCol), - r.getAs[Double](weightCol), - r.getAs[Timestamp](dateCol) - ) - } - def apply(patientID: String, groupID: String, code: String, weight: Double, date: Timestamp): Event[MedicalAct] = { Event(patientID, category, groupID, code, weight, date, None) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReason.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReason.scala index c67ad0dd..6a97e447 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReason.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReason.scala @@ -7,22 +7,6 @@ trait MedicalTakeOverReason extends AnyEvent with EventBuilder { override val category: EventCategory[MedicalTakeOverReason] - def fromRow( - r: Row, - patientIDCol: String = "patientID", - groupIDCol: String = "groupID", - codeCol: String = "code", - weightCol: String = "weight", - dateCol: String = "eventDate"): Event[MedicalTakeOverReason] = { - this.apply( - r.getAs[String](patientIDCol), - r.getAs[String](groupIDCol), - r.getAs[String](codeCol), - r.getAs[Double](weightCol), - r.getAs[Timestamp](dateCol) - ) - } - def apply(patientID: String, groupID: String, code: String, weight: Double, date: Timestamp): Event[MedicalTakeOverReason] = { Event(patientID, category, groupID, code, weight, date, None) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Molecule.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Molecule.scala index 2de0e379..33e98963 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Molecule.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Molecule.scala @@ -11,21 +11,6 @@ trait Molecule extends Dispensation with EventBuilder { override val category: EventCategory[Molecule] = "molecule" - def fromRow( - r: Row, - patientIDCol: String = "patientID", - nameCol: String = "name", - dosageCol: String = "dosage", - dateCol: String = "eventDate"): Event[Molecule] = { - - Molecule( - r.getAs[String](patientIDCol), - r.getAs[String](nameCol), - r.getAs[Double](dosageCol), - r.getAs[Timestamp](dateCol) - ) - } - def apply(patientID: String, name: String, dosage: Double, date: Timestamp): Event[Molecule] = Event(patientID, category, groupID = "NA", name, dosage, date, None) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Outcome.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Outcome.scala index 3936817a..1478e149 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Outcome.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Outcome.scala @@ -17,37 +17,9 @@ trait Outcome extends AnyEvent with EventBuilder { def apply(patientID: String, groupId: String, name: String, weight: Double, date: Timestamp): Event[Outcome] = Event(patientID, category, groupID = groupId, name, weight, date, None) - def fromRow( - r: Row, - patientIDCol: String = "patientID", - nameCol: String = "name", - dateCol: String = "eventDate"): Event[Outcome] = { - - Outcome( - r.getAs[String](patientIDCol), - r.getAs[String](nameCol), - r.getAs[Timestamp](dateCol) - ) - } - def apply(patientID: String, name: String, date: Timestamp): Event[Outcome] = Event(patientID, category, groupID = "NA", name, 0.0, date, None) - def fromRow( - r: Row, - patientIDCol: String, - nameCol: String, - weightCol: String, - dateCol: String): Event[Outcome] = { - - Outcome( - r.getAs[String](patientIDCol), - r.getAs[String](nameCol), - r.getAs[Double](weightCol), - r.getAs[Timestamp](dateCol) - ) - } - def apply(patientID: String, name: String, weight: Double, date: Timestamp): Event[Outcome] = Event(patientID, category, groupID = "NA", name, weight, date, None) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala index 44ed946d..ea78b704 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala @@ -9,19 +9,6 @@ trait PractitionerClaimSpeciality extends AnyEvent with EventBuilder { val category: EventCategory[PractitionerClaimSpeciality] - def fromRow( - r: Row, - patientIDCol: String = "patientID", - pfsIDCol: String = "groupID", - pfsSpeCol: String = "code", - dateCol: String = "eventDate"): Event[PractitionerClaimSpeciality] = - apply( - r.getAs[String](patientIDCol), - r.getAs[String](pfsIDCol), - r.getAs[String](pfsSpeCol), - r.getAs[Timestamp](dateCol) - ) - def apply(patientID: String, groupID: String, pfsSpe: String, date: Timestamp): Event[PractitionerClaimSpeciality] = { Event(patientID, category, groupID, pfsSpe, 0.0, date, None) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Trackloss.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Trackloss.scala index 8690e596..f324a83e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Trackloss.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Trackloss.scala @@ -11,14 +11,6 @@ trait Trackloss extends AnyEvent with EventBuilder { val category: EventCategory[Trackloss] = "trackloss" - def fromRow( - r: Row, - patientIDCol: String = "patientID", - dateCol: String = "eventDate"): Event[Trackloss] = { - - Trackloss(r.getAs[String](patientIDCol), r.getAs[Timestamp](dateCol)) - } - def apply(patientID: String, timestamp: Timestamp): Event[Trackloss] = { Event(patientID, category, groupID = "NA", "trackloss", 0.0, timestamp, None) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/Tracklosses.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/Tracklosses.scala index 615b9fc2..5fb7e0e4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/Tracklosses.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/Tracklosses.scala @@ -26,7 +26,8 @@ class Tracklosses(config: TracklossesConfig) { .withInterval(config.studyEnd) .filterTrackLosses(config.emptyMonths) .withTrackLossDate(config.tracklossMonthDelay) - .map(Trackloss.fromRow(_, dateCol = "tracklossDate")) + .map(e => Trackloss(e.getAs[String]("patientID"), + e.getAs[Timestamp]("tracklossDate"))) } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/ClassificationSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/ClassificationSuite.scala index e716c9db..0bc1b675 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/ClassificationSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/ClassificationSuite.scala @@ -24,28 +24,4 @@ class ClassificationSuite extends AnyFlatSpecLike { // Then assert(result == expected) } - - "fromRow" should "convert the row accordingly" in { - // Given - val schema = StructType( - StructField("patientID", StringType) :: - StructField("groupID", StringType) :: - StructField("name", StringType) :: - StructField("eventDate", TimestampType) :: Nil - ) - - val values = Array[Any]("Stevie", "42", "GHMDA233", makeTS(2016, 1, 1)) - - val row = new GenericRowWithSchema(values, schema) - - val expected = Event("Stevie", "ghm", "42", "GHMDA233", 0.0, makeTS(2016, 1, 1), None) - - // When - val result = GHMClassification.fromRow(row) - - // Then - assert(result == expected) - } - - } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/DiagnosisSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/DiagnosisSuite.scala index 166c6ebf..1f0fd50d 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/DiagnosisSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/DiagnosisSuite.scala @@ -29,44 +29,4 @@ class DiagnosisSuite extends AnyFlatSpec { // Then assert(result == expected) } - - "fromRow" should "allow creation of a DiagnosisBuilder event from a row object" in { - - // Given - val schema = StructType( - StructField("pID", StringType) :: - StructField("gId", StringType) :: - StructField("cod", StringType) :: - StructField("wei", StringType) :: - StructField("dat", TimestampType) :: Nil - ) - val values = Array[Any]("Patient01", "1_1_2010", "C67", 1.0, makeTS(2010, 1, 1)) - val r = new GenericRowWithSchema(values, schema) - val expected = MockDiagnosis("Patient01", "1_1_2010", "C67", 1.0, makeTS(2010, 1, 1)) - - // When - val result = MockDiagnosis.fromRow(r, "pID", "gId", "cod", "wei", "dat") - - // Then - assert(result == expected) - } - - it should "support creation without groupId" in { - - // Given - val schema = StructType( - StructField("pID", StringType) :: - StructField("cod", StringType) :: - StructField("dat", TimestampType) :: Nil - ) - val values = Array[Any]("Patient01", "C67", makeTS(2010, 1, 1)) - val r = new GenericRowWithSchema(values, schema) - val expected = MockDiagnosis("Patient01", "C67", makeTS(2010, 1, 1)) - - // When - val result = MockDiagnosis.fromRow(r, "pID", "cod", "dat") - - // Then - assert(result == expected) - } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/ExposureSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/ExposureSuite.scala index ddaec829..ba14ef27 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/ExposureSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/ExposureSuite.scala @@ -23,24 +23,4 @@ class ExposureSuite extends AnyFlatSpec { // Then assert(result == expected) } - - "fromRow" should "allow creation of a Exposure event from a row object" in { - // Given - val schema = StructType( - StructField("pID", StringType) :: - StructField("mol", StringType) :: - StructField("weight", DoubleType) :: - StructField("start", TimestampType) :: - StructField("end", TimestampType) :: Nil - ) - val values = Array[Any]("Patient01", "pioglitazone", 100.0, makeTS(2010, 1, 1), makeTS(2010, 2, 1)) - val r = new GenericRowWithSchema(values, schema) - val expected = Exposure("Patient01", "pioglitazone", 100.0, makeTS(2010, 1, 1), makeTS(2010, 2, 1)) - - // When - val result = Exposure.fromRow(r, "pID", "mol", "weight", "start", "end") - - // Then - assert(result == expected) - } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStaySuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStaySuite.scala index 036747e5..05976c5b 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStaySuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStaySuite.scala @@ -23,27 +23,4 @@ class HospitalStaySuite extends SharedContext { assert(expected == result) } - - "fromRow" should "create hospital stay event correctly from dataframe row" in { - // Given - val sqlCtx = sqlContext - import sqlCtx.implicits._ - - val df = Seq( - ("patientID", "hospitalID", makeTS(2018, 1, 1), makeTS(2018, 3, 1)) - ).toDF("patientID", "value", "start", "end") - - val expected = HospitalStay( - "patientID", "hospitalID", - makeTS(2018, 1, 1), makeTS(2018, 3, 1) - ) - - //When - val result = HospitalStay.fromRow(df.first) - - //Then - assert(expected == result) - - } - } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalActSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalActSuite.scala index 849023fe..1d734387 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalActSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalActSuite.scala @@ -29,25 +29,4 @@ class MedicalActSuite extends AnyFlatSpec { // Then assert(result == expected) } - - "fromRow" should "allow creation of a DiagnosisBuilder event from a row object" in { - - // Given - val schema = StructType( - StructField("pID", StringType) :: - StructField("gId", StringType) :: - StructField("cod", StringType) :: - StructField("wei", StringType) :: - StructField("dat", TimestampType) :: Nil - ) - val values = Array[Any]("Patient01", "1_1_2010", "C67", 0.0, makeTS(2010, 1, 1)) - val r = new GenericRowWithSchema(values, schema) - val expected = MockMedicalAct("Patient01", "1_1_2010", "C67", 0.0, makeTS(2010, 1, 1)) - - // When - val result = MockMedicalAct.fromRow(r, "pID", "gId", "cod", "wei", "dat") - - // Then - assert(result == expected) - } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReasonSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReasonSuite.scala index 3705a58a..e15b5a7a 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReasonSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReasonSuite.scala @@ -28,25 +28,4 @@ class MedicalTakeOverReasonSuite extends AnyFlatSpec { // Then assert(result == expected) } - - "fromRow" should "allow creation of a MedicalTakeOverReason Builder event from a row object" in { - - // Given - val schema = StructType( - StructField("pID", StringType) :: - StructField("gId", StringType) :: - StructField("cod", StringType) :: - StructField("wei", StringType) :: - StructField("dat", TimestampType) :: Nil - ) - val values = Array[Any]("Patient01", "1_1_2010", "11", 0.0, makeTS(2010, 1, 1)) - val r = new GenericRowWithSchema(values, schema) - val expected = MockMedicalTakeOverReason("Patient01", "1_1_2010", "11", 0.0, makeTS(2010, 1, 1)) - - // When - val result = MockMedicalTakeOverReason.fromRow(r, "pID", "gId", "cod", "wei", "dat") - - // Then - assert(result == expected) - } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MoleculeSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MoleculeSuite.scala index 2c5ba020..be5b18bd 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MoleculeSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MoleculeSuite.scala @@ -24,23 +24,4 @@ class MoleculeSuite extends AnyFlatSpec { // Then assert(result == expected) } - - "fromRow" should "allow creation of a Molecule event from a row object" in { - // Given - val schema = StructType( - StructField("pID", StringType) :: - StructField("mol", StringType) :: - StructField("weight", DoubleType) :: - StructField("date", TimestampType) :: Nil - ) - val values = Array[Any]("Patient01", "pioglitazone", 100.0, makeTS(2010, 1, 1)) - val r = new GenericRowWithSchema(values, schema) - val expected = Molecule("Patient01", "pioglitazone", 100.0, makeTS(2010, 1, 1)) - - // When - val result = Molecule.fromRow(r, "pID", "mol", "weight", "date") - - // Then - assert(result == expected) - } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpecialitySuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpecialitySuite.scala index bf2ccf5b..17c029b0 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpecialitySuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpecialitySuite.scala @@ -37,24 +37,4 @@ class PractitionerClaimSpecialitySuite extends AnyFlatSpec { // Then assert(result == expected) } - - "fromRow" should "allow creation of a PrestationSpecialityBuilder event from a row object" in { - - // Given - val schema = StructType( - StructField("pID", StringType) :: - StructField("gId", StringType) :: - StructField("cod", StringType) :: - StructField("dat", TimestampType) :: Nil - ) - val values = Array[Any]("Patient01", "A10000001", "42", makeTS(2010, 1, 1)) - val r = new GenericRowWithSchema(values, schema) - val expected = MockPractionnerClaimSpeciality$("Patient01", "A10000001", "42", makeTS(2010, 1, 1)) - - // When - val result = MockPractionnerClaimSpeciality$.fromRow(r, "pID", "gId", "cod", "dat") - - // Then - assert(result == expected) - } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/outcomes/OutcomeSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/outcomes/OutcomeSuite.scala index 376885b9..234e91a5 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/outcomes/OutcomeSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/outcomes/OutcomeSuite.scala @@ -36,41 +36,4 @@ class OutcomeSuite extends AnyFlatSpec { // Then assert(result == expected) } - - "fromRow" should "allow creation of a Outcome event from a row object" in { - // Given - val schema = StructType( - StructField("pID", StringType) :: - StructField("name", StringType) :: - StructField("date", TimestampType) :: Nil - ) - val values = Array[Any]("Patient01", "bladder_cancer", makeTS(2010, 1, 1)) - val r = new GenericRowWithSchema(values, schema) - val expected = Outcome("Patient01", "bladder_cancer", makeTS(2010, 1, 1)) - - // When - val result = Outcome.fromRow(r, "pID", "name", "date") - - // Then - assert(result == expected) - } - - "fromRow" should "have severity" in { - // Given - val schema = StructType( - StructField("pID", StringType) :: - StructField("name", StringType) :: - StructField("weight", DoubleType) :: - StructField("date", TimestampType) :: Nil - ) - val values = Array[Any]("Patient01", "bladder_cancer", 4.0, makeTS(2010, 1, 1)) - val r = new GenericRowWithSchema(values, schema) - val expected = Outcome("Patient01", "bladder_cancer", 4.0, makeTS(2010, 1, 1)) - - // When - val result = Outcome.fromRow(r, "pID", "name", "weight", "date") - - // Then - assert(result == expected) - } } From 2c2080ad9da65f9964b236d8fd15828f7bcffe42 Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Tue, 4 Feb 2020 11:17:15 +0100 Subject: [PATCH 04/38] CNAM-438: DrugPrescription Event Transformer. CNAM-438: Correct tests. CNAM-438: Rename variable, add prescriptions to FallTransform. --- .../cnam/etl/events/DrugPrescription.scala | 17 ++++++ .../DrugPrescriptionTransformer.scala | 38 ++++++++++++ .../cmap/cnam/study/fall/FallMain.scala | 33 ++++++++++- .../cnam/study/fall/FallMainTransform.scala | 32 ++++++++++ .../DrugPrescriptionTransformerSuite.scala | 59 +++++++++++++++++++ 5 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/events/DrugPrescription.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformer.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformerSuite.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/DrugPrescription.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/DrugPrescription.scala new file mode 100644 index 00000000..8a6f33df --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/DrugPrescription.scala @@ -0,0 +1,17 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.events + +import java.sql.Timestamp + +/** + * [[Event]] that combines [[Drug]]s to form a Prescription. + */ +trait DrugPrescription extends Dispensation with EventBuilder { + override val category: EventCategory[DrugPrescription] = "drug_prescription" + + def apply(patientID: String, name: String, dosage: Double, groupID: String, date: Timestamp): Event[DrugPrescription] = + Event(patientID, category, groupID, name, dosage, date, None) +} + +object DrugPrescription extends DrugPrescription \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformer.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformer.scala new file mode 100644 index 00000000..d311248f --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformer.scala @@ -0,0 +1,38 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.transformers.drugprescription + +import org.apache.spark.sql.Dataset +import fr.polytechnique.cmap.cnam.etl.events.{Drug, DrugPrescription, Event} + +class DrugPrescriptionTransformer extends Serializable { + /** + * Transform DrugPurchases Events to DrugPrescription Events. + * @param drugs [[Dataset]][[Event]][[Drug]] + * @return [[Dataset]][[Event]][[DrugPrescription]] + */ + def transform(drugs: Dataset[Event[Drug]]): Dataset[Event[DrugPrescription]] = { + + val sqlCtx = drugs.sqlContext + import sqlCtx.implicits._ + drugs + .groupByKey(drug => (drug.groupID, drug.patientID, drug.start)) + .mapGroups((_, drugs) => fromDrugs(drugs.toList)) + .distinct() + } + + /** + * Combines [[Drug]] [[Event]] to form an [[Event]] of type [[DrugPrescription]]. + * WARNING: Drug Events must share the same patientID, groupID and start. + * @param drugs Events to be combined. Must share the same patientID, groupID and start. + * @return DrugPrescription Event which value is concatenation of the values of the passed Drugs. + */ + def fromDrugs(drugs: List[Event[Drug]]): Event[DrugPrescription] = { + val first = drugs.head + val value = drugs + .map(_.value) + .sorted + .reduce((l, r) => l.concat("_").concat(r)) + DrugPrescription(first.patientID, value, first.weight, first.groupID, first.start) + } +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala index 33faad92..8aec6d60 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala @@ -5,13 +5,14 @@ package fr.polytechnique.cmap.cnam.study.fall import scala.collection.mutable import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main -import fr.polytechnique.cmap.cnam.etl.events.{Event, FollowUp, Outcome} +import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event, FollowUp, Outcome} import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.patients.Patient import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.etl.transformers.drugprescription.DrugPrescriptionTransformer import fr.polytechnique.cmap.cnam.etl.transformers.exposures.ExposureTransformer import fr.polytechnique.cmap.cnam.etl.transformers.interaction.NLevelInteractionTransformer import fr.polytechnique.cmap.cnam.study.fall.codes._ @@ -184,6 +185,34 @@ object FallMain extends Main with FractureCodes { ) } + val prescriptions = new DrugPrescriptionTransformer().transform(optionDrugPurchases.get).cache() + + operationsMetadata += { + OperationReporter + .report( + "prescriptions", + List("drug_purchases"), + OperationTypes.Dispensations, + prescriptions.toDF, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) + } + + val prescriptionsExposures = new ExposureTransformer(definition) + .transform(patientsWithFollowUp.map(_._2).distinct())(prescriptions.as[Event[Drug]]).cache() + operationsMetadata += { + OperationReporter + .report( + "prescriptions_exposures", + List("prescriptions", "follow_up"), + OperationTypes.Exposures, + prescriptionsExposures.toDF, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) + } + new ExposureTransformer(definition) .transform(patientsWithFollowUp.map(_._2).distinct())(optionDrugPurchases.get) } @@ -211,6 +240,8 @@ object FallMain extends Main with FractureCodes { fallConfig.output.saveMode ) } + + } operationsMetadata diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala index 6e69ce6e..8968a115 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala @@ -8,6 +8,7 @@ import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters import fr.polytechnique.cmap.cnam.etl.patients.Patient +import fr.polytechnique.cmap.cnam.etl.transformers.drugprescription.DrugPrescriptionTransformer import fr.polytechnique.cmap.cnam.etl.transformers.exposures.ExposureTransformer import fr.polytechnique.cmap.cnam.etl.transformers.interaction.NLevelInteractionTransformer import fr.polytechnique.cmap.cnam.study.fall.codes._ @@ -111,6 +112,37 @@ object FallMainTransform extends Main with FractureCodes { fallConfig.output.saveMode ) } + + val prescriptions = new DrugPrescriptionTransformer().transform(drugPurchases).cache() + + meta += { + "prescriptions" -> + OperationReporter + .report( + "prescriptions", + List("drug_purchases"), + OperationTypes.Dispensations, + prescriptions.toDF, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) + } + + val prescriptionsExposures = new ExposureTransformer(definition) + .transform(patientsWithFollowUp.map(_._2).distinct())(prescriptions.as[Event[Drug]]).cache() + meta += { + "prescriptions_exposures" -> + OperationReporter + .report( + "prescriptions_exposures", + List("prescriptions", "follow_up"), + OperationTypes.Exposures, + prescriptionsExposures.toDF, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) + } + new ExposureTransformer(definition).transform(patientsWithFollowUp.map(_._2))(drugPurchases).cache() } val exposuresReport = OperationReporter.reportAsDataSet( diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformerSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformerSuite.scala new file mode 100644 index 00000000..cf809b93 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformerSuite.scala @@ -0,0 +1,59 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.transformers.drugprescription + +import org.apache.spark.sql.Dataset +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.events.{Drug, DrugPrescription, Event} +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class DrugPrescriptionTransformerSuite extends SharedContext { + + "transform" should "combine Drugs that has the same groupID to form a DrugPrescription" in { + // Given + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + //Given + + val input: Dataset[Event[Drug]] = Seq( + Drug("patient", "CITALOPRAM", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 1, 8)), + Drug("patient", "ZOLPIDEM", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 1, 8)), + Drug("patient", "CITALOPRAM", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 3, 12)), + Drug("patient", "ZOLPIDEM", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 3, 12)), + Drug("patient", "TIAPRIDE", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 3, 12)) + ).toDS + + val transformer = new DrugPrescriptionTransformer() + + val expected: Dataset[Event[DrugPrescription]] = Seq[Event[DrugPrescription]]( + DrugPrescription("patient", "CITALOPRAM_ZOLPIDEM", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 1, 8)), + DrugPrescription("patient", "CITALOPRAM_TIAPRIDE_ZOLPIDEM", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 3, 12)) + ).toDS + + val result = transformer.transform(input) + + assertDSs(expected.as[Event[Drug]], result.as[Event[Drug]], true) + + } + + "fromDrugs" should "combine Drugs to form a DrugPrescription"in { + //Given + val input: List[Event[Drug]] = List( + Drug("patient", "CITALOPRAM", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 1, 8)), + Drug("patient", "ZOLPIDEM", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 1, 8)) + ) + + val transformer = new DrugPrescriptionTransformer() + + val expected: Event[DrugPrescription] = + DrugPrescription("patient", "CITALOPRAM_ZOLPIDEM", 2,"MjAxNC0wOS0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMxMDBfMTc0OQ==", makeTS(2014, 1, 8)) + + + val result = transformer.fromDrugs(input) + + assertResult(expected)(result) + + } + +} From 7b6c96d97642f3fdd0895fd608f057cbad329d8d Mon Sep 17 00:00:00 2001 From: Dian SUN Date: Tue, 4 Feb 2020 16:22:36 +0100 Subject: [PATCH 05/38] CNAM384: trackloss to transformer --- project/build.properties | 2 +- .../extractors/tracklosses/Tracklosses.scala | 65 ----------- .../tracklosses/TracklossesConfig.scala | 12 -- .../tracklosses/TracklossTransformer.scala | 37 +++++++ .../tracklosses/TracklossesConfig.scala | 13 +++ .../study/pioglitazone/PioglitazoneMain.scala | 4 +- .../rosiglitazone/RosiglitazoneMain.scala | 4 +- .../tracklosses/TracklossesSuite.scala | 104 ------------------ .../TracklossTransformerSuite.scala | 35 ++++++ 9 files changed, 90 insertions(+), 186 deletions(-) delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/Tracklosses.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/TracklossesConfig.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossTransformer.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossesConfig.scala delete mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/TracklossesSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossTransformerSuite.scala diff --git a/project/build.properties b/project/build.properties index 64317fda..8e682c52 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=0.13.15 +sbt.version=0.13.18 diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/Tracklosses.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/Tracklosses.scala deleted file mode 100644 index 5fb7e0e4..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/Tracklosses.scala +++ /dev/null @@ -1,65 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors.tracklosses - -import java.sql.Timestamp -import org.apache.spark.sql.expressions.Window -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types.TimestampType -import org.apache.spark.sql.{Column, DataFrame, Dataset} -import fr.polytechnique.cmap.cnam.etl.events.{Event, Trackloss} -import fr.polytechnique.cmap.cnam.etl.sources.Sources - -class Tracklosses(config: TracklossesConfig) { - - import Tracklosses._ - - def extract(sources: Sources): Dataset[Event[Trackloss]] = { - - val dcir: DataFrame = sources.dcir.get - - import dcir.sqlContext.implicits._ - dcir.select(inputColumns: _*) - .filter(col("drug").isNotNull) - .select(col("patientID"), col("eventDate")) - .distinct - .withInterval(config.studyEnd) - .filterTrackLosses(config.emptyMonths) - .withTrackLossDate(config.tracklossMonthDelay) - .map(e => Trackloss(e.getAs[String]("patientID"), - e.getAs[Timestamp]("tracklossDate"))) - } -} - -object Tracklosses { - - val inputColumns: List[Column] = List( - col("NUM_ENQ").as("patientID"), - coalesce( - col("ER_PHA_F__PHA_PRS_IDE"), - col("ER_PHA_F__PHA_PRS_C13") - ).as("drug"), - col("EXE_SOI_DTD").as("eventDate") - ) - - implicit class TracklossesDataFrame(data: DataFrame) { - - def withInterval(lastDate: Timestamp): DataFrame = { - val window = Window.partitionBy(col("patientID")).orderBy(col("eventDate").asc) - data - .withColumn("nextDate", lead(col("eventDate"), 1, lastDate).over(window)) - .filter(col("nextDate").isNotNull) - .withColumn("interval", months_between(col("nextDate"), col("eventDate")).cast("int")) - .drop(col("nextDate")) - } - - def filterTrackLosses(emptyMonths: Int): DataFrame = { - data.filter(col("interval") >= emptyMonths) - } - - def withTrackLossDate(tracklossMonthDelay: Int): DataFrame = { - data.withColumn("tracklossDate", add_months(col("eventDate"), tracklossMonthDelay).cast(TimestampType)) - } - } - -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/TracklossesConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/TracklossesConfig.scala deleted file mode 100644 index c7daa5c9..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/TracklossesConfig.scala +++ /dev/null @@ -1,12 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors.tracklosses - -import java.sql.Timestamp -import fr.polytechnique.cmap.cnam.etl.config.CaseClassConfig - -case class TracklossesConfig( - studyEnd: Timestamp, - emptyMonths: Int = 4, - tracklossMonthDelay: Int = 2) - extends CaseClassConfig diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossTransformer.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossTransformer.scala new file mode 100644 index 00000000..d6d6ef50 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossTransformer.scala @@ -0,0 +1,37 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.transformers.tracklosses + +import me.danielpes.spark.datetime.implicits._ +import org.apache.spark.sql.Dataset +import fr.polytechnique.cmap.cnam.etl.events.{Dispensation, Event, Trackloss} +import fr.polytechnique.cmap.cnam.util.functions._ + +class TracklossTransformer(config: TracklossesConfig) extends Serializable { + + def transform[T <: Dispensation](drugs: Dataset[Event[T]]): Dataset[Event[Trackloss]] = { + + val sqlCtx = drugs.sqlContext + import sqlCtx.implicits._ + + drugs.groupByKey(_.patientID).flatMapGroups((_, events) => fromDispensationToTrackloss(events)).distinct() + + } + + private def fromDispensationToTrackloss(events: Iterator[Event[Dispensation]]): TraversableOnce[Event[Trackloss]] = { + + val sortedEvents = events.toList.sortBy(_.start) + val lastEvent: Event[Dispensation] = sortedEvents.last.copy(start = config.studyEnd) + + val addMonthDelay = (event: Event[Dispensation]) => Trackloss(event.patientID, (event.start + config.tracklossMonthDelay).get) + + (sortedEvents :+ lastEvent).toStream.sliding(2, 1).filter(isInSlide).map(slide => addMonthDelay(slide.head)) + } + + private def isInSlide(slide: Stream[Event[Dispensation]]): Boolean = { + val reachTS = (slide.head.start + config.emptyMonths).get + slide.last.start.after(reachTS) || slide.last.start.equals(reachTS) + } + + +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossesConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossesConfig.scala new file mode 100644 index 00000000..9e1a7f32 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossesConfig.scala @@ -0,0 +1,13 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.transformers.tracklosses + +import java.sql.Timestamp +import me.danielpes.spark.datetime.Period +import me.danielpes.spark.datetime.implicits._ +import fr.polytechnique.cmap.cnam.etl.config.CaseClassConfig + +case class TracklossesConfig( + studyEnd: Timestamp, + emptyMonths: Period = 4.months, + tracklossMonthDelay: Period = 2.months) extends CaseClassConfig diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala index 38b73769..c9988219 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala @@ -11,7 +11,6 @@ import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.molecules.MoleculePurchases import fr.polytechnique.cmap.cnam.etl.extractors.patients.Patients -import fr.polytechnique.cmap.cnam.etl.extractors.tracklosses.{Tracklosses, TracklossesConfig} import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.patients.Patient @@ -19,6 +18,7 @@ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.etl.transformers.exposures.ExposureTransformer import fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformer import fr.polytechnique.cmap.cnam.etl.transformers.observation.ObservationPeriodTransformer +import fr.polytechnique.cmap.cnam.etl.transformers.tracklosses.{TracklossTransformer, TracklossesConfig} import fr.polytechnique.cmap.cnam.study.pioglitazone.extractors.{Diagnoses, MedicalActs} import fr.polytechnique.cmap.cnam.study.pioglitazone.outcomes._ import fr.polytechnique.cmap.cnam.util.datetime.implicits._ @@ -104,7 +104,7 @@ object PioglitazoneMain extends Main { val rawTracklosses = { val tracklossConfig = TracklossesConfig(studyEnd = config.base.studyEnd) - new Tracklosses(tracklossConfig).extract(sources).cache() + new TracklossTransformer(tracklossConfig).transform(rawDrugPurchases).cache() } operationsMetadata += { OperationReporter diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala index 1455c8f3..55660af5 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala @@ -10,7 +10,6 @@ import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.molecules.MoleculePurchases import fr.polytechnique.cmap.cnam.etl.extractors.patients.Patients -import fr.polytechnique.cmap.cnam.etl.extractors.tracklosses.{Tracklosses, TracklossesConfig} import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.patients.Patient @@ -18,6 +17,7 @@ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.etl.transformers.exposures.ExposureTransformer import fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformer import fr.polytechnique.cmap.cnam.etl.transformers.observation.ObservationPeriodTransformer +import fr.polytechnique.cmap.cnam.etl.transformers.tracklosses.{TracklossTransformer, TracklossesConfig} import fr.polytechnique.cmap.cnam.study.rosiglitazone.extractors.Diagnoses import fr.polytechnique.cmap.cnam.study.rosiglitazone.outcomes.RosiglitazoneOutcomeTransformer import fr.polytechnique.cmap.cnam.util.Path @@ -119,7 +119,7 @@ object RosiglitazoneMain extends Main { //Extract Trackloss val tracklosses = { val tracklossConfig = TracklossesConfig(studyEnd = config.base.studyEnd) - new Tracklosses(tracklossConfig).extract(sources).cache() + new TracklossTransformer(tracklossConfig).transform(drugPurchases).cache() } operationsMetadata += { OperationReporter diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/TracklossesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/TracklossesSuite.scala deleted file mode 100644 index d330a347..00000000 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/tracklosses/TracklossesSuite.scala +++ /dev/null @@ -1,104 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors.tracklosses - -import org.apache.spark.sql.DataFrame -import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.Trackloss -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.functions._ - -class TracklossesSuite extends SharedContext { - - "withInterval" should "add the number of month before the next prescription" in { - val sqlCtx = sqlContext - import sqlCtx.implicits._ - - // Given - val input = Seq( - ("Patient_01", makeTS(2006, 1, 5)), - ("Patient_01", makeTS(2006, 11, 5)), - ("Patient_01", makeTS(2007, 2, 5)) - ).toDF("patientID", "eventDate") - - val expected = Seq( - ("Patient_01", makeTS(2006, 1, 5), 10), - ("Patient_01", makeTS(2006, 11, 5), 3), - ("Patient_01", makeTS(2007, 2, 5), 34) - ).toDF("patientID", "eventDate", "interval") - - // When - import Tracklosses.TracklossesDataFrame - val result = input.withInterval(makeTS(2009, 12, 31)) - - // Then - assertDFs(result, expected) - } - - "filterTrackLosses" should "remove any line with small interval" in { - val sqlCtx = sqlContext - import sqlCtx.implicits._ - - // Given - val input = Seq( - ("Patient_01", makeTS(2006, 1, 5), 10), - ("Patient_01", makeTS(2006, 11, 5), 3), - ("Patient_01", makeTS(2007, 2, 5), 34) - ).toDF("patientID", "eventDate", "interval") - - val expected = Seq( - ("Patient_01", makeTS(2006, 1, 5), 10), - ("Patient_01", makeTS(2007, 2, 5), 34) - ).toDF("patientID", "eventDate", "interval") - - // When - import Tracklosses.TracklossesDataFrame - val result = input.filterTrackLosses(4) - - // Then - assertDFs(result, expected) - } - - "withTrackLossDate" should "add the date of the trackloss" in { - val sqlCtx = sqlContext - import sqlCtx.implicits._ - - // Given - val input = Seq( - ("Patient_01", makeTS(2006, 1, 5), 10), - ("Patient_01", makeTS(2007, 2, 5), 34) - ).toDF("patientID", "eventDate", "interval") - - val expected = Seq( - ("Patient_01", makeTS(2006, 1, 5), 10, makeTS(2006, 3, 5)), - ("Patient_01", makeTS(2007, 2, 5), 34, makeTS(2007, 4, 5)) - ).toDF("patientID", "eventDate", "interval", "tracklossDate") - - // When - import Tracklosses.TracklossesDataFrame - val result = input.withTrackLossDate(2) - - // Then - assertDFs(result, expected) - } - - "extract" should "return correct result" in { - val sqlCtx = sqlContext - import sqlCtx.implicits._ - - // Given - val config = TracklossesConfig(makeTS(2006, 12, 31)) - val dcir: DataFrame = sqlContext.read.load("src/test/resources/test-input/DCIR.parquet") - val sources = new Sources(dcir = Some(dcir)) - val expected: DataFrame = Seq( - Trackloss("Patient_01", makeTS(2006, 3, 30)), - Trackloss("Patient_02", makeTS(2006, 3, 30)) - ).toDF - - // When - val result = new Tracklosses(config).extract(sources) - - // Then - assertDFs(result.toDF(), expected) - } -} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossTransformerSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossTransformerSuite.scala new file mode 100644 index 00000000..c2eb6cf4 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/tracklosses/TracklossTransformerSuite.scala @@ -0,0 +1,35 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.transformers.tracklosses + +import org.apache.spark.sql.{DataFrame, Dataset} +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.events.{Event, Molecule, Trackloss} +import fr.polytechnique.cmap.cnam.util.functions + +class TracklossTransformerSuite extends SharedContext { + + "transform" should "return correct result" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + // Given + val config = TracklossesConfig(functions.makeTS(2006, 12, 31)) + val drugs: Dataset[Event[Molecule]] = Seq( + Molecule("Patient_01", "3400935418487", 1.0, functions.makeTS(2006, 1, 15)), + Molecule("Patient_01", "3400935418487", 1.0, functions.makeTS(2006, 6, 30)), + Molecule("Patient_02", "3400935563538", 1.0, functions.makeTS(2006, 1, 5)), + Molecule("Patient_02", "3400935563538", 1.0, functions.makeTS(2006, 1, 15)), + Molecule("Patient_02", "3400935563538", 1.0, functions.makeTS(2006, 1, 30)), + Molecule("Patient_02", "3400935563538", 1.0, functions.makeTS(2006, 1, 30)) + ).toDS() + val expected: Dataset[Event[Trackloss]] = Seq( + Trackloss("Patient_01", functions.makeTS(2006, 3, 15)), + Trackloss("Patient_01", functions.makeTS(2006, 8, 30)), + Trackloss("Patient_02", functions.makeTS(2006, 3, 30)) + ).toDS() + //when + val res = new TracklossTransformer(config).transform(drugs) + //then + assertDSs(expected, res) + } +} From 85466218ee25f75725785175bf5ae7a49db90a10 Mon Sep 17 00:00:00 2001 From: Angel Francisco Orta Date: Thu, 6 Feb 2020 14:33:35 +0100 Subject: [PATCH 06/38] CNAM-435: Documentation added for extractors CNAM-435: Added Documentation to extractor CNAM-435: Text changed to better understanding CNAM-435:Rewrite some documentation to adapt to guidelines. --- Extractors.md | 72 +++++++++++++++++++ .../cmap/cnam/etl/extractors/Extractor.scala | 32 +++++++++ .../etl/extractors/mco/McoExtractor.scala | 26 +++++++ 3 files changed, 130 insertions(+) create mode 100644 Extractors.md diff --git a/Extractors.md b/Extractors.md new file mode 100644 index 00000000..44683b83 --- /dev/null +++ b/Extractors.md @@ -0,0 +1,72 @@ + + +# Extractors + +Extractors are a kind of jobs that allows to extract the required columns from the sources and maps them to the `Event` it is extracting. +An extractor is composed of several basic components that are grouped to give all the functionalities to the extractors. +From a point of view hierarchical we have: + +1) Base elements are traits as [Extractor](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala), +[McoSource](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala) and +[EventRowExtractor](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala). +Each manages different parts of extraction. They are all necessary for the creation of an element of the next level. +2) Intermediate elements are traits that bring together the basic methods to create a common trait to extract the data from the sources (mco,dcir,ssr,had).In our example, +[McoExtractor](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala) +is the base for retrieving the necessary data from mco source. These elements inherit the base elements and accept as a parameter a trait of the type EventType +3) The upper elements are objects and they are the entry point of the job itself, they inherit from the middle elements and specialize it in one type of `Event`. + +Each basic component has a function: + - [Extractor](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala) + is a trait that works by filtering, extracting and building.Its main method `extract ` allows to filter the sources and build a dataset of type `Event`. +```scala +def extract(sources: Sources, codes: Set[String])(implicit ctag: TypeTag[EventType]): Dataset[Event[EventType]] = { + val input: DataFrame = getInput(sources) + import input.sqlContext.implicits._ + { + if (codes.isEmpty) { + input.filter(isInExtractorScope _) + } + else { + input.filter(isInExtractorScope _).filter(isInStudy(codes) _) + } + }.flatMap(builder _).distinct() + } +``` +- [McoSource](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala) +This trait is specific to each source (mco,dcir,ssr,had), containing the values relating to the columns and the methods specific to that source. +- [EventRowExtractor](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala) +This trait contains methods for extracting the fields needed to create an `Event`. + +The intermediate elements implement the required methods and adapt them if necessary. +Two good examples of implementation and modification to suit the specificities of `Event` type are `builder` method and `extractGroupId` method. + +```scala +def builder(row: Row): Seq[Event[EventType]] = { + lazy val patientId = extractPatientId(row) + lazy val groupId = extractGroupId(row) + lazy val eventDate = extractStart(row) + lazy val endDate = extractEnd(row) + lazy val weight = extractWeight(row) + + Seq(eventBuilder[EventType](patientId, groupId, code(row), weight, eventDate, endDate)) + } +``` +```scala + override def extractGroupId(r: Row): String = { + r.getAs[String](ColNames.EtaNum) + "_" + + r.getAs[String](ColNames.RsaNum) + "_" + + r.getAs[Int](ColNames.Year).toString + } +``` + +The above elements are responsible for defining the type of `Event` and must implement at least `columnName` and `eventBuilder` values and modify them according to their specificity. +For exemple at [McoHospitalStaysExtractor](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala) +```scala +object McoHospitalStaysExtractor extends McoExtractor[HospitalStay]{ + + override val columnName: String = ColNames.EndDate + override val eventBuilder: EventBuilder = McoHospitalStay +} +``` + + diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala index 016c59c2..2be85eac 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala @@ -9,14 +9,46 @@ import fr.polytechnique.cmap.cnam.etl.sources.Sources trait Extractor[EventType <: AnyEvent] extends Serializable { + /** Allows to check if the Row from the Source is considered in the current Study. + * + * @param codes A set of codes being considered in the Study. + * @param row The row itself. + * @return A boolean value. + */ def isInStudy(codes: Set[String])(row: Row): Boolean + + /** Checks if the passed Row has the information needed to build the Event. + * + * @param row The row itself. + * @return A boolean value. + */ def isInExtractorScope(row: Row): Boolean + /** Builds the Event. + * + * @param row The row itself. + * @return An event object. + */ def builder(row: Row): Seq[Event[EventType]] + /** Gets and prepares all the needed columns from the Source. + * + * @param sources Source object [[Sources]] that contains all sources. + * @return A dataframe with mco columns. + */ def getInput(sources: Sources): DataFrame + /** Extracts the Event from the Source. + * + * This function is responsible for gluing different other parts of the Extractor. + * This method should be considered the unique callable method from a Study perspective. + * + * @param sources Source object [[Sources]] that contains all sources. + * @param codes A set of codes passed through the method. + * @param ctag An implicit parameter taken from Eventype type. + * @return A dataset of Events. + */ def extract(sources: Sources, codes: Set[String])(implicit ctag: TypeTag[EventType]): Dataset[Event[EventType]] = { val input: DataFrame = getInput(sources) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala index e50ce46d..773397a0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala @@ -32,19 +32,45 @@ trait McoExtractor[EventType <: AnyEvent] extends Extractor[EventType] with McoS Seq(eventBuilder[EventType](patientId, groupId, code(row), weight, eventDate, endDate)) } + /** Extracts the tracked value. + * + * @return A string value. + */ def code = (row: Row) => row.getAs[String](columnName) + /** It gets PatientID value from row. + * + * @param r The row itself. + * @return The value of PatientID. + */ def extractPatientId(r: Row): String = { r.getAs[String](ColNames.PatientID) } + /** Creates an ID that group Events of different categories + * by concatinating ETA_NUM, RSA_NUM and the YEAR. + * + * @param r The row itself. + * @return The value of groupId. + */ override def extractGroupId(r: Row): String = { r.getAs[String](ColNames.EtaNum) + "_" + r.getAs[String](ColNames.RsaNum) + "_" + r.getAs[Int](ColNames.Year).toString } + /** Extracts the EstimatedStayStart as the start. + * It comes from the method [[McoDataFrame.estimateStayStartTime]]. + * + * @param r The row itself. + * @return The value of EstimatedStayStart. + */ def extractStart(r: Row): Timestamp = r.getAs[Timestamp](NewColumns.EstimatedStayStart) + /** It gets ExitMode from row. + * + * @param r The row itself. + * @return The value of ExitMode. + */ def getExit(r: Row): String = r.getAs[String](ColNames.ExitMode) } From 04d0a5c70e58cfbd74544d9bd0cb3407d73bed45 Mon Sep 17 00:00:00 2001 From: strayMat Date: Fri, 14 Feb 2020 17:59:46 +0100 Subject: [PATCH 07/38] Added an end Date to IrImb diagnosis. Added an end Date to IrImb DiagnosisExtractor add documentation and change getEventEnd to cast 1600 dates to None --- .../cmap/cnam/etl/events/Diagnosis.scala | 5 +++ .../diagnoses/ImbDiagnosisExtractor.scala | 42 ++++++++++++++++++- .../diagnoses/ImbDiagnosesSuite.scala | 38 ++++++++++++++--- 3 files changed, 78 insertions(+), 7 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala index 7707eee5..f21f34c0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala @@ -13,6 +13,10 @@ trait Diagnosis extends AnyEvent with EventBuilder { Event(patientID, category, groupID = "NA", code, 0.0, date, None) } + def apply(patientID: String, code: String, date: Timestamp, endDate: Option[Timestamp]): Event[Diagnosis] = { + Event(patientID, category, groupID = "NA", code, 0.0, date, endDate) + } + def apply(patientID: String, groupID: String, code: String, date: Timestamp): Event[Diagnosis] = { Event(patientID, category, groupID, code, 0.0, date, None) } @@ -20,6 +24,7 @@ trait Diagnosis extends AnyEvent with EventBuilder { def apply(patientID: String, groupID: String, code: String, weight: Double, date: Timestamp): Event[Diagnosis] = { Event(patientID, category, groupID, code, weight, date, None) } + } object McoMainDiagnosis extends Diagnosis { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosisExtractor.scala index a26452df..8b853cb5 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosisExtractor.scala @@ -2,12 +2,19 @@ package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses +import scala.util.Try + import java.sql.{Date, Timestamp} + import org.apache.spark.sql.{DataFrame, Row} + import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event, ImbDiagnosis} import fr.polytechnique.cmap.cnam.etl.extractors.Extractor import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.datetime +import fr.polytechnique.cmap.cnam.util.datetime.implicits._ +import fr.polytechnique.cmap.cnam.util.functions.makeTS + object ImbDiagnosisExtractor extends Extractor[Diagnosis] with ImbSource { @@ -17,14 +24,21 @@ object ImbDiagnosisExtractor extends Extractor[Diagnosis] with ImbSource { } override def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(getCode(row).startsWith(_)) + (row: Row): Boolean = codes.exists(getCode(row).startsWith(_)) override def builder(row: Row): Seq[Event[Diagnosis]] = - Seq(ImbDiagnosis(getPatientID(row), getCode(row), getEventDate(row))) + Seq(ImbDiagnosis(getPatientID(row), getCode(row), getEventDate(row), getEventEnd(row))) override def getInput(sources: Sources): DataFrame = sources.irImb.get } +/** IR_IMB_R contains the Chronic Diseases diagnoses (ALD = Affection Longue Duree) for patients once + * they have been exonerated for all cares related to this Chronic Disease. + * It is the medical service of the health insurance that grants this ALD on the proposal of the + * patient's main physician (Medecin Traitant). + * See the [online snds documentation for further details](https://documentation-snds.health-data-hub.fr/fiches/beneficiaires_ald.html#le-dispositif-des-ald) + * + */ trait ImbSource extends Serializable { lazy val getCode = (row: Row) => row.getAs[String](ColNames.Code) @@ -39,11 +53,35 @@ trait ImbSource extends Serializable { row.getAs[Date](ColNames.Date).toTimestamp } + /** + * The End date of the ALD is not always written. It can takes the value 1600-01-01 which + * corresponds to a None value (not set) that we convert to None. + * See the CNAM documentation [available here](https://documentation-snds.health-data-hub.fr/fiches/beneficiaires_ald.html#annexe) + * + * @param r + * @return + */ + def getEventEnd(r: Row): Option[Timestamp] = { + Try({ + val rawEndDate = r.getAs[java.util.Date](ColNames.EndDate).toTimestamp + + if (makeTS(1700, 1 ,1).after(rawEndDate)){ + None + } + else { + Some(rawEndDate) + } + }) recover { + case _: NullPointerException => None + } + }.get + final object ColNames extends Serializable { final lazy val PatientID = "NUM_ENQ" final lazy val Encoding = "MED_NCL_IDT" final lazy val Code = "MED_MTF_COD" final lazy val Date = "IMB_ALD_DTD" + final lazy val EndDate = "IMB_ALD_DTF" } } \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala index 2c24adba..b1bc657f 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala @@ -15,7 +15,7 @@ class ImbDiagnosesSuite extends SharedContext { // Given val imb = sqlContext.read.load("src/test/resources/test-input/IR_IMB_R.parquet") - val expected = Seq(ImbDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13))).toDS + val expected = Seq(ImbDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13), Some(makeTS(2016, 3, 13)))).toDS val sources = Sources(irImb = Some(imb)) // When @@ -32,14 +32,42 @@ class ImbDiagnosesSuite extends SharedContext { // Given val imb = sqlContext.read.load("src/test/resources/test-input/IR_IMB_R.parquet") val expected = Seq( - ImbDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13)), - ImbDiagnosis("Patient_02", "E11", makeTS(2006, 1, 25)), - ImbDiagnosis("Patient_02", "9999", makeTS(2006, 4, 25)) + ImbDiagnosis("Patient_02", "E11", makeTS(2006, 1, 25), Some(makeTS(2011, 1, 24))), + ImbDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13), Some(makeTS(2016, 3, 13))), + ImbDiagnosis("Patient_02", "9999", makeTS(2006, 4, 25), Some(makeTS(2016, 4, 25))) ).toDS val sources = Sources(irImb = Some(imb)) // When - val output = ImbDiagnosisExtractor.extract(sources, Set.empty) + val output = ImbDiagnosisExtractor.extract(sources, Set.empty).orderBy($"start".asc) + + // Then + assertDSs(expected, output) + } + + it should "extract all diagnosis events from raw data when an Empty codes is passed even when ir_imb_f is null" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + //val imb = sqlContext.read.load("src/test/resources/test-input/IR_IMB_R_null.parquet") + val imb = Seq( + ("Patient_02", "CIM10", "E11", makeTS(2006, 1, 25), Some(makeTS(2011, 1, 24))), + ("Patient_02", "CIM10", "C67", makeTS(2006, 3, 13), Some(makeTS(1600, 1, 1))), + ("Patient_03", "CIM10", "C67", makeTS(2006, 3, 13), None), + ("Patient_02", "CIM10", "9999", makeTS(2006, 4, 25), Some(makeTS(2016, 4, 25))) + ).toDF("NUM_ENQ", "MED_NCL_IDT", "MED_MTF_COD", "IMB_ALD_DTD", "IMB_ALD_DTF") + + val expected = Seq( + ImbDiagnosis("Patient_02", "E11", makeTS(2006, 1, 25), Some(makeTS(2011, 1, 24))), + ImbDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13), None), + ImbDiagnosis("Patient_03", "C67", makeTS(2006, 3, 13), None), + ImbDiagnosis("Patient_02", "9999", makeTS(2006, 4, 25), Some(makeTS(2016, 4, 25))) + ).toDS + + val sources = Sources(irImb = Some(imb)) + // When + val output = ImbDiagnosisExtractor.extract(sources, Set.empty).orderBy($"start".asc) // Then assertDSs(expected, output) From 196693f336c10588611ec782b0438c98e2aaa59c Mon Sep 17 00:00:00 2001 From: Angel Francisco Orta Date: Thu, 13 Feb 2020 13:56:06 +0100 Subject: [PATCH 08/38] CNAM-439: Created markdown for Events objects --- Events.md | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++ Extractors.md | 2 +- 2 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 Events.md diff --git a/Events.md b/Events.md new file mode 100644 index 00000000..511eefbd --- /dev/null +++ b/Events.md @@ -0,0 +1,84 @@ +# Events + +An [Event](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Event.scala) + is a figure that allows to homogenize the content of the output datasets. +It consists, at least, of an object to be instantiated and a trait containing the necessary functions. +In order to create an `Event` you first need to understand what it is. +An event is an occurrence defined by having a patient identifier, category, start, group identifier, value, weight and end; +of these only the first three are mandatory.The 7 elements needed to form an `Event` are: +- patientID: is the patient identifier. +- category: define the event's category. +- groupID: contains the ID of a group of related events. +- value: contains string values fot the molecule name, the diagnosis code, etc. +- weight: contains double values for medical acts weighting or other numerical values. +- start: is the start of event's period. +- end: is the end of event's period. +```scala + patientID: String, + category: EventCategory[A], + groupID: String, + value: String, + weight: Double, + start: Timestamp, + end: Option[Timestamp] +``` +All events inherit [AnyEvent](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/AnyEvent.scala) + and [EventBuilder](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/EventBuilder.scala). + `AnyEvent` is a trait with category value, and `EventBuilder` is a trait to built an `Event`. + + +[ObservationPeriod](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/ObservationPeriod.scala) +and [MedicalAct](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala) +are good examples to illustrate the construction of an `Event`. +The first one,`ObservationPeriod`, define the values `patientID`, `category`, `start` and if exist `end`. +The other values are present but use default values. Its trait just assigns the value of the category and uses the apply method to build an `Event`. +```scala + val category: EventCategory[ObservationPeriod] = "observation_period" + + /** Creates un Event object of type ObservationPeriod using a map function to map a dataset. + * + * @param patientID The value patientID from dataset. + * @param start The value start from dataset. + * @param end The value end from dataset. + * @return Event[ObservationPeriod]. + */ + def apply(patientID: String, start: Timestamp, end: Timestamp): Event[ObservationPeriod] = + Event(patientID, category, groupID = "NA", value = "NA", weight = 0D, start, Some(end)) +``` +And its object doesn't add up to anything, it just inherits the trait. +```scala +object ObservationPeriod extends ObservationPeriod +``` +On the other side,`MedicalAct` defines values for all elements except `end` one. +Its trait uses two apply methods to assign values according to need and not assign any value to category one, +it's assigned in each object according to the type. + +```scala + + override val category: EventCategory[MedicalAct] + + def apply(patientID: String, groupID: String, code: String, weight: Double, date: Timestamp): Event[MedicalAct] = { + Event(patientID, category, groupID, code, weight, date, None) + } + + def apply(patientID: String, groupID: String, code: String, date: Timestamp): Event[MedicalAct] = { + Event(patientID, category, groupID, code, 0.0, date, None) + } +``` +The MedicalAct's object are various in accordance with the type of medical act. +Their objects assign categories and in some cases have object that stores groupID values. + +```scala +object BiologyDcirAct extends MedicalAct { + override val category: EventCategory[MedicalAct] = "dcir_biology_act" + + object groupID { + val PrivateAmbulatory = "private_ambulatory" + val PublicAmbulatory = "public_ambulatory" + val PrivateHospital = "private_hospital" + val Liberal = "liberal" + val DcirAct = "DCIR_act" + val Unknown = "unknown_source" + } +} +``` \ No newline at end of file diff --git a/Extractors.md b/Extractors.md index 44683b83..38939ce6 100644 --- a/Extractors.md +++ b/Extractors.md @@ -2,7 +2,7 @@ # Extractors -Extractors are a kind of jobs that allows to extract the required columns from the sources and maps them to the `Event` it is extracting. +Extractors are a kind of jobs that allows to extract the required columns from the sources and maps them to the `Event` ([Events](Events.md)) it is extracting. An extractor is composed of several basic components that are grouped to give all the functionalities to the extractors. From a point of view hierarchical we have: From 38bb81a7843fe9a2362c76d76c4c9745e4edb9c1 Mon Sep 17 00:00:00 2001 From: vjarry Date: Fri, 14 Feb 2020 18:00:14 +0100 Subject: [PATCH 09/38] DREES-106: Adding new Ngap event and extractors --- .../cmap/cnam/etl/events/NgapAct.scala | 39 +++++++++ .../ngapacts/DcirNgapActExtractor.scala | 81 +++++++++++++++++++ .../ngapacts/McoCeNgapActExtractor.scala | 74 +++++++++++++++++ .../ngapacts/NgapActClassConfig.scala | 12 +++ .../extractors/ngapacts/NgapActConfig.scala | 81 +++++++++++++++++++ 5 files changed, 287 insertions(+) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/events/NgapAct.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/NgapAct.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/NgapAct.scala new file mode 100644 index 00000000..daa40ca1 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/NgapAct.scala @@ -0,0 +1,39 @@ +package fr.polytechnique.cmap.cnam.etl.events + +import java.sql.Timestamp +import org.apache.spark.sql.Row + +trait NgapAct extends AnyEvent with EventBuilder { + + override val category: EventCategory[NgapAct] = "ngap_act" + + def apply(patientID: String, groupID: String, ngapCoefficient: String, date: Timestamp): Event[NgapAct] = { + Event(patientID, category, groupID, ngapCoefficient, 0.0, date, None) + } + + def fromRow( + r: Row, + patientIDCol: String = "patientID", + pfsIDCol: String = "groupID", + ngapCoefficient: String = "code", + dateCol: String = "eventDate"): Event[NgapAct] = + apply( + r.getAs[String](patientIDCol), + r.getAs[String](pfsIDCol), + r.getAs[String](ngapCoefficient), + r.getAs[Timestamp](dateCol) + ) +} + + +object DcirNgapAct extends NgapAct { + override val category: EventCategory[NgapAct] = "dcir_ngap_act" +} + +object McoCeFbstcNgapAct extends NgapAct { + override val category: EventCategory[NgapAct] = "mco_ce_fbstc_act" +} + +object McoCeFcstcNgapAct extends NgapAct { + override val category: EventCategory[NgapAct] = "mco_ce_fcstc_act" +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala new file mode 100644 index 00000000..4460e4dc --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala @@ -0,0 +1,81 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts + +import java.sql.Timestamp + +import scala.reflect.runtime.universe._ +import scala.util.Try +import org.apache.spark.sql.{Column, DataFrame, Dataset, Row} +import fr.polytechnique.cmap.cnam.etl.events.{DcirNgapAct, Event, EventBuilder, NgapAct} +import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import org.apache.spark.sql.functions.col + +class DcirNgapActExtractor(ngapActsConfig: NgapActConfig) extends DcirExtractor[NgapAct] { + + override val columnName: String = ColNames.NaturePrestation + val ngapKeyLetter: String = "PRS_NAT_CB2" + + override val eventBuilder: EventBuilder = DcirNgapAct + + + override def getInput(sources: Sources): DataFrame = { + val neededColumns: List[Column] = List( + ColNames.PatientID, ColNames.NaturePrestation, ColNames.NgapCoefficient, + ColNames.Date, ColNames.ExecPSNum, ColNames.DcirFluxDate, ngapKeyLetter + ).map(colName => col(colName)) + + + lazy val irNat = sources.irNat.get + lazy val dcir = sources.dcir.get + + lazy val df: DataFrame = dcir.join(irNat, dcir("PRS_NAT_REF").cast("String") === irNat("PRS_NAT")) + + df.select(neededColumns: _*) + } + + override def isInExtractorScope(row: Row): Boolean = { + !row.isNullAt(row.fieldIndex(ngapKeyLetter)) + } + + override def isInStudy(codes: Set[String])(row: Row): Boolean = { + ngapActsConfig.dcirIsInCategory( + ngapActsConfig.acts_categories, + row + ) + } + + override def code: Row => String = (row: Row) => { + row.getAs[Int](ColNames.NaturePrestation).toString + "_" + + row.getAs[String](ngapKeyLetter) + "_" + + row.getAs[Double](ColNames.NgapCoefficient).toString + } + + override def extractStart(r: Row): Timestamp = { + Try(super.extractStart(r)) recover { + case _: NullPointerException => extractFluxDate(r) + } + }.get + + override def extractGroupId(r: Row): String = { + r.getAs[String](ColNames.ExecPSNum) + } + + override def extract( + sources: Sources, + codes: Set[String]) + (implicit ctag: TypeTag[NgapAct]): Dataset[Event[NgapAct]] = { + + val input: DataFrame = getInput(sources) + + import input.sqlContext.implicits._ + + { + if (ngapActsConfig.acts_categories.isEmpty) { + input.filter(isInExtractorScope _) + } + else { + input.filter(isInExtractorScope _).filter(isInStudy(codes) _) + } + }.flatMap(builder _).distinct() + } +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala new file mode 100644 index 00000000..dbdf8680 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala @@ -0,0 +1,74 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts + +import fr.polytechnique.cmap.cnam.etl.events.{Event, EventBuilder, McoCeFbstcNgapAct, McoCeFcstcNgapAct, NgapAct} +import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import org.apache.spark.sql.{DataFrame, Dataset, Row} + +import scala.reflect.runtime.universe._ +import scala.util.Try + +trait McoCeNgapActExtractor extends McoCeExtractor[NgapAct] { + val ngapActsConfig: NgapActConfig + val keyLetterColumn: String + val coeffColumn: String + + val columnName: String = keyLetterColumn + + override def isInStudy(codes: Set[String])(row: Row): Boolean = { + ngapActsConfig.pmsiIsInCategories( + ngapActsConfig.acts_categories, + keyLetterColumn, + coeffColumn, + row + ) + } + + override def code: Row => String = (row: Row) => { + val coeff = Try(row.getAs[Double](coeffColumn).toString) recover { + case _: NullPointerException => "0" + } + "PmsiCe_" + row.getAs[String](keyLetterColumn) + "_" + coeff.get + } + + override def extractGroupId(r: Row): String = { + r.getAs[String](ColNames.EtaNum) + "_" + + r.getAs[String](ColNames.SeqNum) + "_" + + r.getAs[Int](ColNames.Year).toString + } + + override def extract( + sources: Sources, + codes: Set[String]) + (implicit ctag: TypeTag[NgapAct]): Dataset[Event[NgapAct]] = { + + val input: DataFrame = getInput(sources) + + import input.sqlContext.implicits._ + + { + if (ngapActsConfig.acts_categories.isEmpty) { + input.filter(isInExtractorScope _) + } + else { + input.filter(isInExtractorScope _).filter(isInStudy(codes) _) + } + }.flatMap(builder _).distinct() + } +} + +class McoCeFbstcNgapActExtractor(ngapConfig: NgapActConfig) extends McoCeNgapActExtractor { + val ngapActsConfig: NgapActConfig = ngapConfig + val keyLetterColumn: String = ColNames.NgapKeyLetterFbstc + val coeffColumn: String = ColNames.NgapCoefficientFbstc + override val columnName: String = keyLetterColumn + override val eventBuilder: EventBuilder = McoCeFbstcNgapAct +} + +class McoCeFcstcNgapActExtractor(ngapConfig: NgapActConfig) extends McoCeNgapActExtractor { + val ngapActsConfig: NgapActConfig = ngapConfig + val keyLetterColumn: String = ColNames.NgapKeyLetterFcstc + val coeffColumn: String = ColNames.NgapCoefficientFcstc + override val columnName: String = keyLetterColumn + override val eventBuilder: EventBuilder = McoCeFcstcNgapAct +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala new file mode 100644 index 00000000..3f7949a7 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala @@ -0,0 +1,12 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts + +trait NgapActClassConfig extends Serializable { + """ + ngapCoefficients should always be specified with the dot separation for float, as this is how they are coded in the snds. + eg: "2.0" should be used instead of "2" + """.stripMargin + //val name: String + val ngapKeyLetters: Seq[String] + val ngapCoefficients: Seq[String] + val ngapPrsNatRefs: Seq[String] = Seq() +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala new file mode 100644 index 00000000..5023d2fe --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala @@ -0,0 +1,81 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts + +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig + +import scala.util.Try +import org.apache.spark.sql.Row + +/** + * NgapActConfig defines three different ways to filter for specific ngap acts in the SNDS : + * The base configuration is NgapActClassConfig which can filters on : + * - prestation type (ngapPrsNatRefs: PRS_NAT_REF), + * - prestation coefficient (ngapKeyLetters : PRS_NAT_CB2 or ACT_COD in the PMSI_CE), + * - prestation coefficient (ngapCoefficients: PRS_ACT_CFT or ACT_COE in the PMSI_CE) + * **Note**: If acts_categories is empty, all ngap acts are extracted. + * The Ngap acts can be found in two sources. The filtering logic differs depending on the source. + * + * In the Dcir, search where ngapKeyLetter is available (ie. TODO what proportion in echantillon 2008-2016): + * - If a list of ngapPrsNatRefs is given, it extracts all of these PrsNatRef + * - if a list of ngapKeyLetters and a list of ngapCoefficients is given, it extracts all combination of (keyLetter, coefficient) + * + * In the Pmsi (only McoCe implemented, less than 12000 ngap acts per year in SSR_CE), + * search where ngapCoefficient is available + * - if a list of ngapKeyLetters and a list of ngapCoefficients is given, it extracts all combination of (keyLetter, coefficient) + * - if the list of ngapCoefficients is empty, extract all acts where coeff is in ngapCoefficient + * @param acts_categories List of configuration to get specific NgapActs + */ +class NgapActConfig( + val acts_categories: List[NgapActClassConfig]) extends ExtractorConfig with Serializable { + + def dcirIsInCategory( + categories: List[NgapActClassConfig], + row: Row): Boolean = { + + val ngapKeyLetter : String = row.getAs[String]("PRS_NAT_CB2") + val ngapCoefficient : String = row.getAs[Double]("PRS_ACT_CFT").toString + val prsNatRef: String = row.getAs[Int]("PRS_NAT_REF").toString + + categories + .exists(category => + (category.ngapKeyLetters.contains(ngapKeyLetter) && + category.ngapCoefficients.contains(ngapCoefficient)) || + category.ngapPrsNatRefs.contains(prsNatRef) + ) + } + + def pmsiIsInCategories( + categories: List[NgapActClassConfig], + ngapKeyColumn: String, + ngapCoeffColumn: String, + row: Row): Boolean = { + + val letter = row.getAs[String](ngapKeyColumn) + val coeff = Try(row.getAs[Double](ngapCoeffColumn).toString) recover { + case _: NullPointerException => "0" + } + + categories + .exists(category => pmsiIsInCategory(category, letter, coeff.get)) + } + + def pmsiIsInCategory( + category: NgapActClassConfig, + ngapLetter: String, + ngapCoeff: String): Boolean = { + if (category.ngapCoefficients.isEmpty) { + category.ngapKeyLetters.contains(ngapLetter) + } + else { + category.ngapCoefficients.contains(ngapCoeff) && + category.ngapKeyLetters.contains(ngapLetter) + } + } +} + +object NgapActConfig { + def apply(acts_categories: List[NgapActClassConfig]): NgapActConfig= new NgapActConfig( + acts_categories + ) + +} \ No newline at end of file From efab0b0f4279fc129094fc6edf316e1bd7c793f7 Mon Sep 17 00:00:00 2001 From: vjarry Date: Fri, 14 Feb 2020 18:29:19 +0100 Subject: [PATCH 10/38] DREES-106: Modifications in the extractor logics in PMSI for NGAP and practitioner speciality extractions --- .../extractors/acts/SsrCeActExtractor.scala | 45 ++++++++++++++++++ .../etl/extractors/mcoCe/McoCeExtractor.scala | 47 +++++++++++++++++++ .../etl/extractors/mcoCe/McoCeSource.scala | 32 +++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala new file mode 100644 index 00000000..6a691b68 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala @@ -0,0 +1,45 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.acts + +import java.sql.{Date, Timestamp} +import org.apache.spark.sql.{DataFrame, Row, functions} +import fr.polytechnique.cmap.cnam.etl.events.{Event, SsrCEAct, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.Extractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.util.datetime.implicits._ + +object SsrCeActExtractor extends Extractor[MedicalAct] with SsrCeSourceExtractor { + override def isInStudy(codes: Set[String]) + (row: Row): Boolean = codes.exists(getCode(row).startsWith(_)) + + override def isInExtractorScope(row: Row): Boolean = !isNullAt(ColNames.CamCode)(row) + + override def builder(row: Row): Seq[Event[MedicalAct]] = { + lazy val patientID = getPatientID(row) + lazy val date = getDate(row) + lazy val code = getCode(row) + // todo + tard, on peut recuperer le ETA_NUM + numero du pfs de santé à la place de ACE pr groupID + Seq(SsrCEAct(patientID, "ACE", code, date)) + } + + override def getInput(sources: Sources): DataFrame = + sources.ssrCe.get.select(ColNames.all.map(functions.col): _*) +} + +trait SsrCeSourceExtractor { + + def getPatientID(row: Row): String = row.getAs[String](ColNames.PatientID) + + def getDate(row: Row): Timestamp = row.getAs[Date](ColNames.Date).toTimestamp + + def getCode(row: Row): String = row.getAs[String](ColNames.CamCode) + + def isNullAt(colName: String)(row: Row): Boolean = row.isNullAt(row.fieldIndex(colName)) + + final object ColNames extends Serializable { + final lazy val PatientID = "NUM_ENQ" + final lazy val CamCode = "SSR_FMSTC__CCAM_COD" + final lazy val Date = "EXE_SOI_DTD" + final lazy val all = List(PatientID, CamCode, Date) + } + +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala new file mode 100644 index 00000000..9e8c4652 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala @@ -0,0 +1,47 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.mcoCe + +import java.sql.Timestamp +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.{DataFrame, Row} +import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} +import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.util.datetime.implicits._ + + +trait McoCeExtractor[EventType <: AnyEvent] extends Extractor[EventType] with McoCeSource with EventRowExtractor { + + val columnName: String + + val eventBuilder: EventBuilder + + def getInput(sources: Sources): DataFrame = sources.mcoCe.get.select(ColNames.all.map(col): _*) + + def isInStudy(codes: Set[String]) + (row: Row): Boolean = codes.exists(code(row).startsWith(_)) + + def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(columnName)) + + def builder(row: Row): Seq[Event[EventType]] = { + lazy val patientId = extractPatientId(row) + lazy val groupId = extractGroupId(row) + lazy val eventDate = extractStart(row) + lazy val endDate = extractEnd(row) + lazy val weight = extractWeight(row) + + Seq(eventBuilder[EventType](patientId, groupId, code(row), weight, eventDate, endDate)) + } + + def code = (row: Row) => row.getAs[String](columnName) + + def extractPatientId(r: Row): String = { + r.getAs[String](ColNames.PatientID) + } + + override def extractGroupId(r: Row): String = { + r.getAs[String](ColNames.EtaNum) + "_" + + r.getAs[String](ColNames.SeqNum) + "_" + r.getAs[Int](ColNames.Year).toString + } + + def extractStart(r: Row): Timestamp = r.getAs[Timestamp](ColNames.Date).toTimestamp +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala new file mode 100644 index 00000000..cee6a3d5 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala @@ -0,0 +1,32 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.mcoCe + +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{LongType, TimestampType} +import org.apache.spark.sql.{Column, DataFrame} +import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames +import fr.polytechnique.cmap.cnam.util.ColumnUtilities.parseTimestamp + +trait McoCeSource extends ColumnNames { + + final object ColNames extends Serializable { + val PatientID: ColName = "NUM_ENQ" + val EtaNum: ColName = "ETA_NUM" + val SeqNum : ColName = "SEQ_NUM" + val CamCode = "MCO_FMSTC__CCAM_COD" + val Date = "EXE_SOI_DTD" + val NgapKeyLetterFbstc = "MCO_FBSTC__ACT_COD" + val NgapCoefficientFbstc = "MCO_FBSTC__ACT_COE" + val PractitionnerSpecialtyFbstc = "MCO_FBSTC__EXE_SPE" + val NgapKeyLetterFcstc = "MCO_FCSTC__ACT_COD" + val NgapCoefficientFcstc = "MCO_FCSTC__ACT_COE" + val PractitionnerSpecialtyFcstc = "MCO_FCSTC__EXE_SPE" + val Year = "year" + + val all = List( + PatientID, EtaNum, SeqNum, Year, CamCode, Date, + NgapKeyLetterFbstc, NgapCoefficientFbstc, PractitionnerSpecialtyFbstc, + NgapKeyLetterFcstc, NgapCoefficientFcstc, PractitionnerSpecialtyFcstc + ) + } + +} From f734a4a1e6061c5b28238a0307a6f42a62e96726 Mon Sep 17 00:00:00 2001 From: vjarry Date: Fri, 14 Feb 2020 18:30:21 +0100 Subject: [PATCH 11/38] DREES-106: Dealing with SSR and MCO CE extractors logic --- .../extractors/acts/McoCeActExtractor.scala | 48 +++---------------- .../etl/extractors/dcir/DcirExtractor.scala | 13 ++++- .../etl/extractors/mco/McoExtractor.scala | 10 ++-- 3 files changed, 25 insertions(+), 46 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala index 0793353c..7abf6609 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala @@ -1,46 +1,12 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.acts - -import java.sql.{Date, Timestamp} -import org.apache.spark.sql.{DataFrame, Row, functions} -import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCEAct, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.Extractor -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.datetime.implicits._ - -object McoCeActExtractor extends Extractor[MedicalAct] with McoCeSourceExtractor { - override def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(getCode(row).startsWith(_)) - - override def isInExtractorScope(row: Row): Boolean = !isNullAt(ColNames.CamCode)(row) +// License: BSD 3 clause - override def builder(row: Row): Seq[Event[MedicalAct]] = { - lazy val patientID = getPatientID(row) - lazy val date = getDate(row) - lazy val code = getCode(row) - - Seq(McoCEAct(patientID, "ACE", code, date)) - } - - override def getInput(sources: Sources): DataFrame = - sources.mcoCe.get.select(ColNames.all.map(functions.col): _*) -} - - -trait McoCeSourceExtractor { - - def getPatientID(row: Row): String = row.getAs[String](ColNames.PatientID) - - def getDate(row: Row): Timestamp = row.getAs[Date](ColNames.Date).toTimestamp - - def getCode(row: Row): String = row.getAs[String](ColNames.CamCode) +package fr.polytechnique.cmap.cnam.etl.extractors.acts - def isNullAt(colName: String)(row: Row): Boolean = row.isNullAt(row.fieldIndex(colName)) - final object ColNames extends Serializable { - final lazy val PatientID = "NUM_ENQ" - final lazy val CamCode = "MCO_FMSTC__CCAM_COD" - final lazy val Date = "EXE_SOI_DTD" - final lazy val all = List(PatientID, CamCode, Date) - } +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, McoCEAct, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeExtractor +object McoCeActExtractor extends McoCeExtractor[MedicalAct] { + val columnName: String = ColNames.CamCode + override val eventBuilder: EventBuilder = McoCEAct } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala index a70699df..f67e45eb 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala @@ -4,6 +4,7 @@ package fr.polytechnique.cmap.cnam.etl.extractors.dcir import java.sql.Timestamp import org.apache.commons.codec.binary.Base64 +import scala.util.Try import org.apache.spark.sql.functions.col import org.apache.spark.sql.{DataFrame, Row} import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} @@ -40,7 +41,17 @@ trait DcirExtractor[EventType <: AnyEvent] extends Extractor[EventType] with Dci r.getAs[String](ColNames.PatientID) } - def extractStart(r: Row): Timestamp = r.getAs[java.util.Date](ColNames.Date).toTimestamp + /** Trying to catch unknown dates (example : IJ = Indemnité Journalière) + * + * @param r The Row object itself + * @return The date of the event or the flux date if it doesn't exist + */ + def extractStart(r: Row): Timestamp = { + Try(r.getAs[java.util.Date](ColNames.Date).toTimestamp) recover { + case _: NullPointerException => extractFluxDate(r) + } + }.get + def extractFluxDate(r: Row): Timestamp = r.getAs[java.util.Date](ColNames.DcirFluxDate).toTimestamp diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala index 773397a0..54270321 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala @@ -3,11 +3,12 @@ package fr.polytechnique.cmap.cnam.etl.extractors.mco import java.sql.Timestamp -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} + import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.{DataFrame, Row} trait McoExtractor[EventType <: AnyEvent] extends Extractor[EventType] with McoSource with EventRowExtractor { @@ -17,8 +18,9 @@ trait McoExtractor[EventType <: AnyEvent] extends Extractor[EventType] with McoS def getInput(sources: Sources): DataFrame = sources.mco.get.select(ColNames.all.map(col): _*).estimateStayStartTime - def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(code(row).startsWith(_)) + def isInStudy(codes: Set[String])(row: Row): Boolean = { + codes.exists(code(row).startsWith(_)) + } def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(columnName)) From 3c06cc5bca192ca2f6b1f2e6ec436ad7413f01f7 Mon Sep 17 00:00:00 2001 From: vjarry Date: Mon, 17 Feb 2020 10:47:21 +0100 Subject: [PATCH 12/38] DREES-106: Adding practitioner speciality extraction --- .../events/PractionnerClaimSpeciality.scala | 18 +++++- ...PractitionerClaimSpecialityExtractor.scala | 63 ++++++++++++++++++- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala index ea78b704..1530d83e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala @@ -20,4 +20,20 @@ object MedicalPractitionerClaim extends PractitionerClaimSpeciality { object NonMedicalPractitionerClaim extends PractitionerClaimSpeciality { override val category: EventCategory[PractitionerClaimSpeciality] = "non_medical_practitioner_claim" -} \ No newline at end of file +} + +object McoCeFbstcMedicalPractitionerClaim extends PractitionerClaimSpeciality { + override val category: EventCategory[PractitionerClaimSpeciality] = "mco_ce__fbstc_practitioner_claim" +} + +object McoCeFcstcMedicalPractitionerClaim extends PractitionerClaimSpeciality { + override val category: EventCategory[PractitionerClaimSpeciality] = "mco_ce__fcstc_practitioner_claim" +} + +object SsrCeFbstcMedicalPractitionerClaim extends PractitionerClaimSpeciality { + override val category: EventCategory[PractitionerClaimSpeciality] = "ssr_ce__fbstc_practitioner_claim" +} + +object SsrCeFcstcMedicalPractitionerClaim extends PractitionerClaimSpeciality { + override val category: EventCategory[PractitionerClaimSpeciality] = "ssr_ce__fcstc_practitioner_claim" +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala index 4bcdd724..95dab891 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala @@ -3,11 +3,19 @@ package fr.polytechnique.cmap.cnam.etl.extractors.prestations import java.sql.Timestamp -import scala.util.Try -import org.apache.spark.sql.Row + import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeExtractor +import org.apache.spark.sql.Row +import scala.util.Try + +/** + * Get specialties of medical practitionner in the Dcir: + * If a specialty is available, it extracts the specialty using PSE_SPE_COD and the practitioner + * identifier from the database. + */ object MedicalPractitionerClaimExtractor extends DcirExtractor[PractitionerClaimSpeciality] { override val columnName: String = ColNames.MSpe override val eventBuilder: EventBuilder = MedicalPractitionerClaim @@ -23,8 +31,20 @@ object MedicalPractitionerClaimExtractor extends DcirExtractor[PractitionerClaim override def extractGroupId(r: Row): String = { r.getAs[String](ColNames.ExecPSNum) } + + override def isInStudy(codes: Set[String]) + (row: Row): Boolean = codes.contains(code(row)) + + override def isInExtractorScope(row: Row): Boolean = { + (!row.isNullAt(row.fieldIndex(columnName))) & (row.getAs[Integer](columnName) != 0) + } } +/** + * Get specialties of the non medical practitionners in the Dcir: + * If a specialty is available, it extracts the specialty using PSE_ACT_NAT and the practitioner + * identifier from the database. + */ object NonMedicalPractitionerClaimExtractor extends DcirExtractor[PractitionerClaimSpeciality] { override val columnName: String = ColNames.NonMSpe override val eventBuilder: EventBuilder = NonMedicalPractitionerClaim @@ -40,4 +60,43 @@ object NonMedicalPractitionerClaimExtractor extends DcirExtractor[PractitionerCl override def extractGroupId(r: Row): String = { r.getAs[String](ColNames.ExecPSNum) } + + override def isInExtractorScope(row: Row): Boolean = { + (!row.isNullAt(row.fieldIndex(columnName))) & (row.getAs[Integer](columnName) != 0) + } + + override def isInStudy(codes: Set[String]) + (row: Row): Boolean = codes.contains(code(row)) +} + + +/** + * Get specialties of the non medical practitioners in the MCO_CE: + * If a specialty is available, it extracts the specialty using MCO_FBSTC_ _EXE_SPE and MCO_FCSTC_ _EXE_SPE. + * These two columns are complementary as described here : + * https://documentation-snds.health-data-hub.fr/fiches/actes_consult_externes.html#les-tables-du-pmsi-version-snds-pour-les-ace + **/ +trait McoCeSpecialtyExtractor extends McoCeExtractor[PractitionerClaimSpeciality] { + override val eventBuilder: EventBuilder = MedicalPractitionerClaim + + override def code: Row => String = (row: Row) => row.getAs[Int](columnName).toString + + + override def isInStudy(codes: Set[String]) + (row: Row): Boolean = codes.contains(code(row)) + + override def isInExtractorScope(row: Row): Boolean = { + (!row.isNullAt(row.fieldIndex(columnName))) & (row.getAs[Integer](columnName) != 0) + } +} + +object McoCeFbstcSpecialtyExtractor extends McoCeSpecialtyExtractor { + override val columnName: String = ColNames.PractitionnerSpecialtyFbstc + override val eventBuilder: EventBuilder = McoCeFbstcMedicalPractitionerClaim +} + + +object McoCeFcstcSpecialtyExtractor extends McoCeSpecialtyExtractor { + override val columnName: String = ColNames.PractitionnerSpecialtyFcstc + override val eventBuilder: EventBuilder = McoCeFcstcMedicalPractitionerClaim } From 9f95326bcc5ef342d814e59442e65a79efa51b21 Mon Sep 17 00:00:00 2001 From: vjarry Date: Mon, 17 Feb 2020 09:58:32 +0100 Subject: [PATCH 13/38] DREES-106: Adding documentation on the sources. Adding IR_NAT_REF referentiel. Revert "Adding documentation on the sources. Adding IR_NAT_REF referentiel." Those changes should be in the Sources modification Merge Request for clarity Adding IR_NAT_REF for Nap extractor --- .../cnam/etl/config/study/StudyConfig.scala | 3 ++- .../cmap/cnam/etl/sources/Sources.scala | 19 ++++++++++++++----- .../cnam/etl/sources/value/IrNatSource.scala | 7 +++++++ 3 files changed, 23 insertions(+), 6 deletions(-) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/value/IrNatSource.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/config/study/StudyConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/config/study/StudyConfig.scala index b92f496b..4df4c563 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/config/study/StudyConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/config/study/StudyConfig.scala @@ -12,12 +12,13 @@ object StudyConfig { dcir: Option[String] = None, mco: Option[String] = None, mcoCe: Option[String] = None, - ssr: Option[List[String]] = None, + ssr: Option[String] = None, ssrCe: Option[String] = None, had: Option[String] = None, irBen: Option[String] = None, irImb: Option[String] = None, irPha: Option[String] = None, + irNat: Option[String] = None, dosages: Option[String] = None) case class OutputPaths( diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala index 67097fb8..720b3f91 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala @@ -5,8 +5,8 @@ package fr.polytechnique.cmap.cnam.etl.sources import java.sql.Timestamp import org.apache.spark.sql.{DataFrame, SQLContext} import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig.InputPaths -import fr.polytechnique.cmap.cnam.etl.sources.data.{DcirSource, McoCeSource, McoSource, SsrSource, SsrCeSource, HadSource} -import fr.polytechnique.cmap.cnam.etl.sources.value.{DosagesSource, IrBenSource, IrImbSource, IrPhaSource} +import fr.polytechnique.cmap.cnam.etl.sources.data.{DcirSource, HadSource, McoCeSource, McoSource, SsrCeSource, SsrSource} +import fr.polytechnique.cmap.cnam.etl.sources.value.{DosagesSource, IrBenSource, IrImbSource, IrNatSource, IrPhaSource} case class Sources( dcir: Option[DataFrame] = None, @@ -18,6 +18,7 @@ case class Sources( irBen: Option[DataFrame] = None, irImb: Option[DataFrame] = None, irPha: Option[DataFrame] = None, + irNat: Option[DataFrame] = None, dosages: Option[DataFrame] = None) object Sources { @@ -33,6 +34,7 @@ object Sources { irBen = sources.irBen.map(IrBenSource.sanitize), irImb = sources.irImb.map(IrImbSource.sanitize), irPha = sources.irPha.map(IrPhaSource.sanitize), + irNat = sources.irNat.map(IrNatSource.sanitize), dosages = sources.dosages.map(DosagesSource.sanitize) ) } @@ -44,9 +46,11 @@ object Sources { ssr = sources.ssr.map(SsrSource.sanitizeDates(_, studyStart, studyEnd)), had = sources.had.map(HadSource.sanitizeDates(_, studyStart, studyEnd)), mcoCe = sources.mcoCe.map(McoCeSource.sanitizeDates(_, studyStart, studyEnd)), + ssrCe = sources.ssrCe.map(SsrCeSource.sanitizeDates(_, studyStart, studyEnd)), irBen = sources.irBen, irImb = sources.irImb, irPha = sources.irPha, + irNat = sources.irNat, dosages = sources.dosages ) } @@ -59,9 +63,11 @@ object Sources { mcoCePath = paths.mcoCe, hadPath = paths.had, ssrPaths = paths.ssr, + ssrCePath = paths.ssrCe, irBenPath = paths.irBen, irImbPath = paths.irImb, irPhaPath = paths.irPha, + irNatPath = paths.irNat, dosagesPath = paths.dosages ) } @@ -72,22 +78,25 @@ object Sources { mcoPath: Option[String] = None, mcoCePath: Option[String] = None, hadPath: Option[String] = None, - //@todo The merge of ssr_sej and ssr_c should be finally moved to the Flattening project - ssrPaths: Option[List[String]] = None, + ssrPaths: Option[String] = None, + ssrCePath: Option[String] = None, irBenPath: Option[String] = None, irImbPath: Option[String] = None, irPhaPath: Option[String] = None, + irNatPath: Option[String] = None, dosagesPath: Option[String] = None): Sources = { Sources( dcir = dcirPath.map(DcirSource.read(sqlContext, _)), mco = mcoPath.map(McoSource.read(sqlContext, _)), mcoCe = mcoCePath.map(McoCeSource.read(sqlContext, _)), - had = hadPath.map(HadSource.read(sqlContext, _)), ssr = ssrPaths.map(SsrSource.read(sqlContext, _)), + ssrCe = ssrCePath.map(SsrCeSource.read(sqlContext, _)), + had = hadPath.map(HadSource.read(sqlContext, _)), irBen = irBenPath.map(IrBenSource.read(sqlContext, _)), irImb = irImbPath.map(IrImbSource.read(sqlContext, _)), irPha = irPhaPath.map(IrPhaSource.read(sqlContext, _)), + irNat = irNatPath.map(IrNatSource.read(sqlContext, _)), dosages = dosagesPath.map(DosagesSource.read(sqlContext, _)) ) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/value/IrNatSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/value/IrNatSource.scala new file mode 100644 index 00000000..97125f62 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/value/IrNatSource.scala @@ -0,0 +1,7 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.sources.value + +import fr.polytechnique.cmap.cnam.etl.sources.SourceManager + +object IrNatSource extends SourceManager From 38660bf1bfff04d6e1206aceb8defa22d074ad36 Mon Sep 17 00:00:00 2001 From: vjarry Date: Fri, 21 Feb 2020 15:53:34 +0100 Subject: [PATCH 14/38] DREES-106: Adding documentation and tests Adding and correcting tests. Correcting test on DcirBiologyActs extractor PractitionnerClaimSpecialitySuite Modification for codecov Removing fromRow, changed NgapActSuite An error was corrected in MedicalActSuite Correction on the NgapActSuite test Correction on the order of the imports. Removed blanked lines unneeded. Taking into account reviews on the PR. Add comments, refacto on camelCase class name syntax. Taking into account reviews on the PR. - Adding new Ngap events for Ssr for future utilisation - Adding comments and documentation - Refacto on variables and class for syntax issue Adding Documentation and tests Adding Documentation and tests This is a combination of 3 commits. This is the 1st commit message: Taking into account reviews on the PR. Add comments, refacto on camelCase class name syntax. This is the commit message #2: Taking into account reviews on the PR. - Adding new Ngap events for Ssr for future utilisation - Adding comments and documentation - Refacto on variables and class for syntax issue This is the commit message #3: Adding documentation and tests Adding the definition of an Ngap act. Adding documentation and tests Correcting McoNgapActExtractor test Taking into account the review Better syntax and update on the groupID extraction for the DcirNgapActExtractor. Tests updated. Deleting a file which is not supposed to be in study but in etl/extractors Added coments on the Mco CE Ngap extractor. Adding test for SsrCEMedicalActExtractor Adding documentation and tests Adding the definition of an Ngap act. Adding documentation and tests Correcting McoNgapActExtractor test Taking into account the review Better syntax and update on the groupID extraction for the DcirNgapActExtractor. Tests updated. Deleting a file which is not supposed to be in study but in etl/extractors Added coments on the Mco CE Ngap extractor. Adding test for SsrCEMedicalActExtractor Refacto name of class NgapActConfig --- .../cmap/cnam/etl/events/NgapAct.scala | 53 +- .../events/PractionnerClaimSpeciality.scala | 5 + .../etl/extractors/dcir/DcirExtractor.scala | 13 +- .../etl/extractors/mco/McoExtractor.scala | 5 +- .../etl/extractors/mcoCe/McoCeExtractor.scala | 8 +- .../ngapacts/DcirNgapActExtractor.scala | 84 +- .../ngapacts/McoCeNgapActExtractor.scala | 56 +- .../extractors/ngapacts/NgapActConfig.scala | 57 +- ...ractitionnerClaimSpecialityExtractor.scala | 20 - src/test/resources/PMSI/T_MCOaa_nnFBSTC.csv | 4 + src/test/resources/PMSI/T_MCOaa_nnFCSTC.csv | 4 + src/test/resources/test-input/DCIR.parquet | Bin 43993 -> 36655 bytes .../resources/test-input/DCIR_w_BIO.parquet | Bin 43988 -> 43993 bytes src/test/resources/test-input/MCO_CE.parquet | Bin 18435 -> 34106 bytes src/test/resources/test-input/SSR.parquet | Bin 0 -> 26038 bytes src/test/resources/value_tables/IR_NAT_V.csv | 1256 +++++++++++++++++ .../resources/value_tables/IR_NAT_V.parquet | Bin 0 -> 42011 bytes .../cnam/etl/events/MedicalActSuite.scala | 2 +- .../cmap/cnam/etl/events/NgapActSuite.scala | 27 + .../PractionnerClaimSpecialitySuite.scala | 6 +- .../acts/DcirBiologyActsSuite.scala | 2 +- .../acts/McoCEMedicalActsSuite.scala | 44 +- .../acts/SsrCEMedicalActsSuite.scala | 83 ++ .../ngapacts/DcirNgapActsExtractorSuite.scala | 92 ++ .../ngapacts/McoNgapActsExtractorSuite.scala | 118 ++ ...itionerClaimSpecialityExtractorSuite.scala | 105 +- 26 files changed, 1878 insertions(+), 166 deletions(-) delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/dreesChronic/extractors/PractitionnerClaimSpecialityExtractor.scala create mode 100644 src/test/resources/PMSI/T_MCOaa_nnFBSTC.csv create mode 100644 src/test/resources/PMSI/T_MCOaa_nnFCSTC.csv create mode 100644 src/test/resources/test-input/SSR.parquet create mode 100644 src/test/resources/value_tables/IR_NAT_V.csv create mode 100644 src/test/resources/value_tables/IR_NAT_V.parquet create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/events/NgapActSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/NgapAct.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/NgapAct.scala index daa40ca1..160aad0e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/NgapAct.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/NgapAct.scala @@ -1,35 +1,52 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row +/** The NGAP is one of the two different nomenclatures used in the SNDS to facture healthcare acts. + * + * It concerns mainly nurses, physiotherapist masseurs and medical auxiliaries and + * some acts of dental surgeons as well as clinical acts of doctors. It has to be distinguished from the CCAM, + * which groups together the technical acts performed by doctors (much more precise). + * A updated date version can be found on the website of the CNAM : + * https://www.ameli.fr/ain/masseur-kinesitherapeute/exercice-liberal/facturation-remuneration/nomenclatures-ngap-et-lpp/nomenclatures-ngap-lpp) + * + */ trait NgapAct extends AnyEvent with EventBuilder { override val category: EventCategory[NgapAct] = "ngap_act" + def apply(patientID: String, groupID: String, ngapCoefficient: String, weight: Double, date: Timestamp): Event[NgapAct] = { + Event(patientID, category, groupID, ngapCoefficient, weight, date, None) + } + def apply(patientID: String, groupID: String, ngapCoefficient: String, date: Timestamp): Event[NgapAct] = { Event(patientID, category, groupID, ngapCoefficient, 0.0, date, None) } - - def fromRow( - r: Row, - patientIDCol: String = "patientID", - pfsIDCol: String = "groupID", - ngapCoefficient: String = "code", - dateCol: String = "eventDate"): Event[NgapAct] = - apply( - r.getAs[String](patientIDCol), - r.getAs[String](pfsIDCol), - r.getAs[String](ngapCoefficient), - r.getAs[Timestamp](dateCol) - ) } + object DcirNgapAct extends NgapAct { override val category: EventCategory[NgapAct] = "dcir_ngap_act" + + object groupID { + val PrivateAmbulatory = "private_ambulatory" + val PublicAmbulatory = "public_ambulatory" + val PrivateHospital = "private_hospital" + val Liberal = "liberal" + val DcirNgapAct = "dcir_ngap_act" + val Unknown = "unknown_source" + } + } +/** + * Tables of hospital services (FBSTC) and procedures (FCSTC) are not completed for each stay and are complementary. + * All the details are in the collaborative documentation on the SNDS here : + * https://documentation-snds.health-data-hub.fr/fiches/actes_consult_externes.html#reperage-des-ace-dans-la-table-des-prestations-dcir + */ object McoCeFbstcNgapAct extends NgapAct { override val category: EventCategory[NgapAct] = "mco_ce_fbstc_act" } @@ -37,3 +54,11 @@ object McoCeFbstcNgapAct extends NgapAct { object McoCeFcstcNgapAct extends NgapAct { override val category: EventCategory[NgapAct] = "mco_ce_fcstc_act" } + +object SsrCeFbstcNgapAct extends NgapAct { + override val category: EventCategory[NgapAct] = "ssr_ce_fbstc_act" +} + +object SsrCeFcstcNgapAct extends NgapAct { + override val category: EventCategory[NgapAct] = "ssr_ce_fcstc_act" +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala index 1530d83e..63ed1342 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala @@ -22,6 +22,11 @@ object NonMedicalPractitionerClaim extends PractitionerClaimSpeciality { override val category: EventCategory[PractitionerClaimSpeciality] = "non_medical_practitioner_claim" } +/** + * Tables of hospital services (FBSTC) and procedures (FCSTC) are not completed for each stay and are complementary. + * All the details are in the collaborative documentation on the SNDS here : + * https://documentation-snds.health-data-hub.fr/fiches/actes_consult_externes.html#reperage-des-ace-dans-la-table-des-prestations-dcir + */ object McoCeFbstcMedicalPractitionerClaim extends PractitionerClaimSpeciality { override val category: EventCategory[PractitionerClaimSpeciality] = "mco_ce__fbstc_practitioner_claim" } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala index f67e45eb..96580f4d 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala @@ -3,8 +3,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.dcir import java.sql.Timestamp -import org.apache.commons.codec.binary.Base64 import scala.util.Try +import org.apache.commons.codec.binary.Base64 import org.apache.spark.sql.functions.col import org.apache.spark.sql.{DataFrame, Row} import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} @@ -41,11 +41,12 @@ trait DcirExtractor[EventType <: AnyEvent] extends Extractor[EventType] with Dci r.getAs[String](ColNames.PatientID) } - /** Trying to catch unknown dates (example : IJ = Indemnité Journalière) - * - * @param r The Row object itself - * @return The date of the event or the flux date if it doesn't exist - */ + /** Trying to catch unknown dates + * example of unknown dates situation : IJ = Indemnité Journalière which are a replacement income + * paid by the HealthCare Insurance during a sick leave. + * @param r The Row object itself + * @return The date of the event or the flux date if it doesn't exist + */ def extractStart(r: Row): Timestamp = { Try(r.getAs[java.util.Date](ColNames.Date).toTimestamp) recover { case _: NullPointerException => extractFluxDate(r) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala index 54270321..8c6f7ca0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala @@ -3,12 +3,11 @@ package fr.polytechnique.cmap.cnam.etl.extractors.mco import java.sql.Timestamp - +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.{DataFrame, Row} import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} trait McoExtractor[EventType <: AnyEvent] extends Extractor[EventType] with McoSource with EventRowExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala index 9e8c4652..38dafe36 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala @@ -38,9 +38,15 @@ trait McoCeExtractor[EventType <: AnyEvent] extends Extractor[EventType] with Mc r.getAs[String](ColNames.PatientID) } + /** Return groupID as hospital stay ID + * + * @param r + * @return groupId which is the unique ID of the hospital stay + */ override def extractGroupId(r: Row): String = { r.getAs[String](ColNames.EtaNum) + "_" + - r.getAs[String](ColNames.SeqNum) + "_" + r.getAs[Int](ColNames.Year).toString + r.getAs[String](ColNames.SeqNum) + "_" + + r.getAs[Int](ColNames.Year).toString } def extractStart(r: Row): Timestamp = r.getAs[Timestamp](ColNames.Date).toTimestamp diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala index 4460e4dc..a13a0412 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala @@ -1,35 +1,32 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts -import java.sql.Timestamp - import scala.reflect.runtime.universe._ -import scala.util.Try import org.apache.spark.sql.{Column, DataFrame, Dataset, Row} +import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.{DcirNgapAct, Event, EventBuilder, NgapAct} import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -import org.apache.spark.sql.functions.col class DcirNgapActExtractor(ngapActsConfig: NgapActConfig) extends DcirExtractor[NgapAct] { + private final val PrivateInstitutionCodes = Set(4D, 5D, 6D, 7D) + override val columnName: String = ColNames.NaturePrestation val ngapKeyLetter: String = "PRS_NAT_CB2" - override val eventBuilder: EventBuilder = DcirNgapAct - override def getInput(sources: Sources): DataFrame = { + val neededColumns: List[Column] = List( ColNames.PatientID, ColNames.NaturePrestation, ColNames.NgapCoefficient, - ColNames.Date, ColNames.ExecPSNum, ColNames.DcirFluxDate, ngapKeyLetter + ColNames.Date, ColNames.ExecPSNum, ColNames.DcirFluxDate, ngapKeyLetter, + ColNames.Sector, ColNames.GHSCode, ColNames.InstitutionCode ).map(colName => col(colName)) - lazy val irNat = sources.irNat.get lazy val dcir = sources.dcir.get lazy val df: DataFrame = dcir.join(irNat, dcir("PRS_NAT_REF").cast("String") === irNat("PRS_NAT")) - df.select(neededColumns: _*) } @@ -38,28 +35,59 @@ class DcirNgapActExtractor(ngapActsConfig: NgapActConfig) extends DcirExtractor[ } override def isInStudy(codes: Set[String])(row: Row): Boolean = { - ngapActsConfig.dcirIsInCategory( - ngapActsConfig.acts_categories, + dcirIsInCategory( + ngapActsConfig.actsCategories, row ) } + /** + * We extract Ngap acts as a concatenation of three different ways to identify specific ngap acts in the SNDS : + * - prestation type (ngapPrsNatRefs: PRS_NAT_REF), + * - prestation coefficient (ngapKeyLetters : PRS_NAT_CB2 or ACT_COD in the PMSI_CE), + * - prestation coefficient (ngapCoefficients: PRS_ACT_CFT or ACT_COE in the PMSI_CE) + * + * For more information, Cf NgapActConfig documentation. + * + * @return concatenation of the three codes + */ override def code: Row => String = (row: Row) => { row.getAs[Int](ColNames.NaturePrestation).toString + "_" + row.getAs[String](ngapKeyLetter) + "_" + row.getAs[Double](ColNames.NgapCoefficient).toString } - override def extractStart(r: Row): Timestamp = { - Try(super.extractStart(r)) recover { - case _: NullPointerException => extractFluxDate(r) - } - }.get - override def extractGroupId(r: Row): String = { - r.getAs[String](ColNames.ExecPSNum) + + if (!r.isNullAt(r.fieldIndex(ColNames.Sector)) && getSector(r) == 1) { + DcirNgapAct.groupID.PublicAmbulatory + } + else { + if (r.isNullAt(r.fieldIndex(ColNames.GHSCode))) { + DcirNgapAct.groupID.Liberal + } else { + // Value is not at null, it is not liberal + lazy val ghs = getGHS(r) + lazy val institutionCode = getInstitutionCode(r) + // Check if it is a private ambulatory + if (ghs == 0 && PrivateInstitutionCodes.contains(institutionCode)) { + DcirNgapAct.groupID.PrivateAmbulatory + } + else { + DcirNgapAct.groupID.Unknown + } + } + } } + def getGHS(r: Row): Double = r.getAs[Double](ColNames.GHSCode) + + def getInstitutionCode(r: Row): Double = r.getAs[Double](ColNames.InstitutionCode) + + def getSector(r: Row): Double = r.getAs[Double](ColNames.Sector) + + override def extractWeight(r: Row): Double = 1.0 + override def extract( sources: Sources, codes: Set[String]) @@ -70,7 +98,7 @@ class DcirNgapActExtractor(ngapActsConfig: NgapActConfig) extends DcirExtractor[ import input.sqlContext.implicits._ { - if (ngapActsConfig.acts_categories.isEmpty) { + if (ngapActsConfig.actsCategories.isEmpty) { input.filter(isInExtractorScope _) } else { @@ -78,4 +106,22 @@ class DcirNgapActExtractor(ngapActsConfig: NgapActConfig) extends DcirExtractor[ } }.flatMap(builder _).distinct() } + + def dcirIsInCategory( + categories: List[NgapActClassConfig], + row: Row): Boolean = { + + val ngapKeyLetter : String = row.getAs[String]("PRS_NAT_CB2") + val ngapCoefficient : String = row.getAs[Double]("PRS_ACT_CFT").toString + val prsNatRef: String = row.getAs[Int]("PRS_NAT_REF").toString + + categories + .exists(category => + ( + category.ngapKeyLetters.contains(ngapKeyLetter) && + category.ngapCoefficients.contains(ngapCoefficient) + ) || + category.ngapPrsNatRefs.contains(prsNatRef) + ) + } } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala index dbdf8680..9c86488f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala @@ -1,12 +1,11 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts +import scala.reflect.runtime.universe._ +import scala.util.Try +import org.apache.spark.sql.{DataFrame, Dataset, Row} import fr.polytechnique.cmap.cnam.etl.events.{Event, EventBuilder, McoCeFbstcNgapAct, McoCeFcstcNgapAct, NgapAct} import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -import org.apache.spark.sql.{DataFrame, Dataset, Row} - -import scala.reflect.runtime.universe._ -import scala.util.Try trait McoCeNgapActExtractor extends McoCeExtractor[NgapAct] { val ngapActsConfig: NgapActConfig @@ -16,8 +15,8 @@ trait McoCeNgapActExtractor extends McoCeExtractor[NgapAct] { val columnName: String = keyLetterColumn override def isInStudy(codes: Set[String])(row: Row): Boolean = { - ngapActsConfig.pmsiIsInCategories( - ngapActsConfig.acts_categories, + pmsiIsInCategories( + ngapActsConfig.actsCategories, keyLetterColumn, coeffColumn, row @@ -31,12 +30,6 @@ trait McoCeNgapActExtractor extends McoCeExtractor[NgapAct] { "PmsiCe_" + row.getAs[String](keyLetterColumn) + "_" + coeff.get } - override def extractGroupId(r: Row): String = { - r.getAs[String](ColNames.EtaNum) + "_" + - r.getAs[String](ColNames.SeqNum) + "_" + - r.getAs[Int](ColNames.Year).toString - } - override def extract( sources: Sources, codes: Set[String]) @@ -47,7 +40,7 @@ trait McoCeNgapActExtractor extends McoCeExtractor[NgapAct] { import input.sqlContext.implicits._ { - if (ngapActsConfig.acts_categories.isEmpty) { + if (ngapActsConfig.actsCategories.isEmpty) { input.filter(isInExtractorScope _) } else { @@ -55,6 +48,43 @@ trait McoCeNgapActExtractor extends McoCeExtractor[NgapAct] { } }.flatMap(builder _).distinct() } + + def pmsiIsInCategory( + category: NgapActClassConfig, + ngapLetter: String, + ngapCoeff: String): Boolean = { + if (category.ngapCoefficients.isEmpty) { + category.ngapKeyLetters.contains(ngapLetter) + } + else { + category.ngapCoefficients.contains(ngapCoeff) && + category.ngapKeyLetters.contains(ngapLetter) + } + } + + /** User could be interested by different Ngap categories each defined by a list of key letters + * and a list of coefficients. This function iterates over each category. More détails in the NgapActConfig class. + * + * @param categories : A list of Ngap prestation and coefficient codes + * @param ngapKeyColumn : the Ngap prestation code for MCO CE + * @param ngapCoeffColumn : the Ngap coefficient which complete the prestation code for MCO CE + * @param row + * @return + */ + def pmsiIsInCategories( + categories: List[NgapActClassConfig], + ngapKeyColumn: String, + ngapCoeffColumn: String, + row: Row): Boolean = { + + val letter = row.getAs[String](ngapKeyColumn) + val coeff = Try(row.getAs[Double](ngapCoeffColumn).toString) recover { + case _: NullPointerException => "0" + } + + categories + .exists(category => pmsiIsInCategory(category, letter, coeff.get)) + } } class McoCeFbstcNgapActExtractor(ngapConfig: NgapActConfig) extends McoCeNgapActExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala index 5023d2fe..6df9922c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala @@ -1,10 +1,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts -import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig - import scala.util.Try import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig /** * NgapActConfig defines three different ways to filter for specific ngap acts in the SNDS : @@ -23,59 +21,14 @@ import org.apache.spark.sql.Row * search where ngapCoefficient is available * - if a list of ngapKeyLetters and a list of ngapCoefficients is given, it extracts all combination of (keyLetter, coefficient) * - if the list of ngapCoefficients is empty, extract all acts where coeff is in ngapCoefficient - * @param acts_categories List of configuration to get specific NgapActs + * @param actsCategories List of configuration to get specific NgapActs */ class NgapActConfig( - val acts_categories: List[NgapActClassConfig]) extends ExtractorConfig with Serializable { - - def dcirIsInCategory( - categories: List[NgapActClassConfig], - row: Row): Boolean = { - - val ngapKeyLetter : String = row.getAs[String]("PRS_NAT_CB2") - val ngapCoefficient : String = row.getAs[Double]("PRS_ACT_CFT").toString - val prsNatRef: String = row.getAs[Int]("PRS_NAT_REF").toString - - categories - .exists(category => - (category.ngapKeyLetters.contains(ngapKeyLetter) && - category.ngapCoefficients.contains(ngapCoefficient)) || - category.ngapPrsNatRefs.contains(prsNatRef) - ) - } - - def pmsiIsInCategories( - categories: List[NgapActClassConfig], - ngapKeyColumn: String, - ngapCoeffColumn: String, - row: Row): Boolean = { - - val letter = row.getAs[String](ngapKeyColumn) - val coeff = Try(row.getAs[Double](ngapCoeffColumn).toString) recover { - case _: NullPointerException => "0" - } - - categories - .exists(category => pmsiIsInCategory(category, letter, coeff.get)) - } - - def pmsiIsInCategory( - category: NgapActClassConfig, - ngapLetter: String, - ngapCoeff: String): Boolean = { - if (category.ngapCoefficients.isEmpty) { - category.ngapKeyLetters.contains(ngapLetter) - } - else { - category.ngapCoefficients.contains(ngapCoeff) && - category.ngapKeyLetters.contains(ngapLetter) - } - } + val actsCategories: List[NgapActClassConfig]) extends ExtractorConfig with Serializable { } object NgapActConfig { - def apply(acts_categories: List[NgapActClassConfig]): NgapActConfig= new NgapActConfig( - acts_categories + def apply(actsCategories: List[NgapActClassConfig]): NgapActConfig= new NgapActConfig( + actsCategories ) - } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/dreesChronic/extractors/PractitionnerClaimSpecialityExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/dreesChronic/extractors/PractitionnerClaimSpecialityExtractor.scala deleted file mode 100644 index 7e0aeac3..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/dreesChronic/extractors/PractitionnerClaimSpecialityExtractor.scala +++ /dev/null @@ -1,20 +0,0 @@ -package fr.polytechnique.cmap.cnam.study.dreesChronic.extractors - - -import fr.polytechnique.cmap.cnam.etl.events.{PractitionerClaimSpeciality, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.prestations._ -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.functions.unionDatasets -import org.apache.spark.sql.Dataset - -class PractitionnerClaimSpecialityExtractor(config: PractitionerClaimSpecialityConfig) { - - def extract(sources: Sources): Dataset[Event[PractitionerClaimSpeciality]] = { - - val nonMedicalSpeciality = NonMedicalPractitionerClaimExtractor.extract(sources, config.nonMedicalSpeCodes.toSet) - val medicalSpeciality = MedicalPractitionerClaimExtractor.extract(sources, config.medicalSpeCodes.toSet) - - unionDatasets(nonMedicalSpeciality, medicalSpeciality) - - } -} diff --git a/src/test/resources/PMSI/T_MCOaa_nnFBSTC.csv b/src/test/resources/PMSI/T_MCOaa_nnFBSTC.csv new file mode 100644 index 00000000..04cc8e84 --- /dev/null +++ b/src/test/resources/PMSI/T_MCOaa_nnFBSTC.csv @@ -0,0 +1,4 @@ +ACT_COD,ACT_COE,ACT_DNB,ACT_NBR,AMC_MNR,AMO_MNR,COEF_MCO,DEL_DAT_ENT,ETA_NUM,ETA_NUM_GEO,EXE_SPE,EXO_TM,HON_MNT,NUM_FAC,PRI_UNI,PSH_DMT,PSH_MDT,REM_BAS,REM_TAU,RSF_TYP,SEQ_NUM,SOR_ANN,SOR_MOI,TYP_ART,TYP_FPI +ABC,1.0,47,13,7083244.63,74666912.92,90593.7962,87701,390780146,UKObWJm,1,JrmsfEVr,12015701.82,yKWJO,6751465.02,dftrehpdSXgmPYxKLw,HtmEEPoJZNUYjK,52232224.74,530,ogD,00064268,2014,odz,I,FcpMYDQXuLXXicz +ABG,42.0,54,604,9248747.63,76316698.24,12890.6905,27754,190000059,LZqDvhEnofoP,,BNBN,74937208.48,ededzd,7342024.15,MlFnnCA,CcxCwN,96066675.71,867,mpN,00022621,2014,qVsIPuQ,X,wEcWCNecLDJcashSJO +ACO,,100,92,4013746.75,60935884.85,39460.6247,38073,390780146,OJqUNrNnJtHLiRXG,22,cuwUELVG,10034714.74,qEAWmFaaUVWUKgW,2651112.13,RTtAdKMS,nON,81328743.81,397,WXYr,00114237,2014,XW,H,Gavod diff --git a/src/test/resources/PMSI/T_MCOaa_nnFCSTC.csv b/src/test/resources/PMSI/T_MCOaa_nnFCSTC.csv new file mode 100644 index 00000000..76b77f5c --- /dev/null +++ b/src/test/resources/PMSI/T_MCOaa_nnFCSTC.csv @@ -0,0 +1,4 @@ +ACT_COD,ACT_COE,ACT_DNB,ACT_NBR,AMC_MNR,AMO_MNR,COEF_MCO,CONSULT_MIG,DEL_DAT_ENT,ETA_NUM,ETA_NUM_GEO,EXE_SPE,EXO_TM,HON_MNT,NUM_FAC,PRI_UNI,PSH_DMT,PSH_MDT,REM_BAS,REM_TAU,RSF_TYP,SEQ_NUM,SOR_ANN,SOR_MOI,TYP_ART,DAT_RET,NIR_ANO_17,NIAS_RET,SEX_RET,NOE_MNR,NOE_OPE,NIR_RET +ADE,802770.97,95,754,909104.28,19056776.78,30247.1354,zwJmWpuZRvEbySDeO,14775,390780146,AoEhdAjLtQjD,1,y,31336037.18,wHtQPioDLBPGAsG,3261939.8,PZVV,SgTeuUTsBAPP,9674004.7,294,eDLOvjVzKzLFh,00114237,KMMzOCcn,BXIyjqQw,X,0,uJNf,0,0,44091236,435,0 +ADC,420416.2,28,124,8879822.76,68200463.67,42206.718,BGvBlvvFSRkVp,8328,710780214,EPngKCorUZHRrtL,25,ypirRYVD,14755336.76,TsMNRhvuyBzmgsf,6242542.15,XkraU,OfEarDFRf,38840813.17,528,iVsWLTHURmVBRHHGPYR,00000130,Hfc,x,H,0,sUzrFNwAIWJBq,0,0,6701917,399,0 +A F,126936.43,4,19,5342621.25,41563748.97,9526.5076,wuZAhfcFNbJgrACI,84179,390780146,TMCbZywqMYisk,13,HCIF,59253505.42,efefz,1203645.95,qDjlCyzADWhx,oojjbj,56565841.8,371,tjyoD,00026744,eyNxA,KryZYbNDd,C,0,rTNotODgUKNtpBBhfEm,0,0,28198642,955,0 diff --git a/src/test/resources/test-input/DCIR.parquet b/src/test/resources/test-input/DCIR.parquet index bd269990a2f09b3742cea835fd8bfc36be663a73..07db202365c8fe1f5018539b3bd675e6a2c6cbc2 100644 GIT binary patch literal 36655 zcmdU14UAORb)L67h5%c9)+;n`VcZ*=6z0?l3!xF|jjVU~wE{ zAh6i@H?Hdt>QYQo>Nu3T)IyeJxyZFFD~%9EG_qV2MTknovaAZbkwsn0t*q9vtjOs( z=broX?t3#Z^CpqH((Jyy_nhyZbMCq4-goc0Q_7Yz$yhR2TbTku31{BHjN7R-dtR`^_QkHzNP9E%|-evGe+vErTq^lRQz@)*<48*q%~~XsYHvT_T@PxJh1-OM6VP&QG+)Scb@u=e7|j4m z$QoBY5n!TI0LIz@%@hyyz$(-fEekS-h$n20G`4sH=m2KNArS%wO@^iQO-6k`cX%2Q z_H=Cm1@S4J+0@;$JODRMP`J}XeMm==nyrOQBs+$a)DQ6v_6xuL$uAA>H0&Nrb|mLu zSBb&4Vm9bDZ7y|-ZNnR(>KpJ0C>_UAH;(lo3Ije%CvZnb0KhL_+sKFupApJN`(^pU}6dR+De`GtE7GQUg@ImaZFl-)J%ojAN-A$NCTnmL!+) zY>_PC7)qym@K0}VFaF77FsS*bjdrMW+kuYI^rX?h1Y%DBt}820)AVO0Mh*)u4P5|m zqBAWxbA+QS0k?ky(d=l1F=K@L&4Z4_2!21OiN`>&=MGQK9q~kLhO-ngM3f+Dtce2$ zvWxC!N9O(>HF5CNd@GT~V%Ng#9WY89OAR1xmcwXe)5%W`zVqPRlZ*OK?E2=+UvC-i z2Q0sW30dPNBy9>cU_~Et+WRcXAO7e1pWSfH!v4j#KXvwh7u^y9DQnz>q`d;<(Vyba ztp2qq>|izoQr5T$NgD!bPi<)FfWqt(SZ6srgcjfIh$LTvPY?o24f|`S2!og&3W24e z5O;=1fsPZD!g41qMHmG3X%GTSL!}5) zFuRAwOk7rjoPTJ>V9$q#11znfZ4hEG)aFBjEOUrp%tzv& zTL_5IIU$x@m|SG`X&TqI%U7&)6Y`1r^!hyYX#$;8R@B$;3d&PdOW?A4P9G|cz%$^3 zKLG5XHsV-g7+FWi{yz#CUrPjrKQhL#)WFc=OTE0%>N3%J+qZwUXZ0)+!$1EdWR0ty zQfO74hwh}+oV^m@*gn;|a$jT}J=f@!U72G9s?bID;%jpiT zHPI^O97>bYJ8yjp4IF#t<^BK&x+2xM2}x^BE$A`nWEZcc-1yO-I!^vy1Y*Z?XNdo) ze?(-z9}8rtam5qRhOMj2QH@^vU%&g^?*dY?KVw~pbS;3OcQ_iMbBA$+(rZZupJ<_v z#`kNdjc<)l)B0o^`1P|Jet6Gk|Fk3<&?_Np*hWBW>31jaTy!G2816aj-~M4iW?AtXws8hVGn1=S(P!zbEfuq&Ma5s zITIXm{uvIJsLR08fewjdsT;?7t0Qn2m>W>&8~B6424MW&Q9Fjm7F#1!u3Y6>HXu^i zDwi7eZAh7-p$njRXyBk|SZmC&+I?vOZjL1&lo(c2M&z1N`G({I4i?w0LNT)VYXpC- z2}t9NxK5TVZOLKk1DvD}&IJL4IF_<3_{ca9x3w=s;<{tUmQ;`W;M<(DTt-_hY5(H- zEnJ~_bF^Tu0GKseL|@@=vXO88G%bY2#Wf551TAE&l>HVu0fQbrmq(qkBNKe}cWUfx zm=1%2;tz;mIgC&Q>q3T=4KcG01lC;v0$aiM8j>QGT$)T9YeU{(L#!0;4yG!!CftXX zZpTvo0RzH%pSQzi_n>%k$+!x)JNz9wQ`fo1bFgz;*eNJz!{+dZOJqmQYFQNPTHh%+ zHJMHhjl_!Z;k|Y%;)=a?q#;T4APs~yq{MCvY9lbP?kcniHwc!7Mn<*prd_~9r*x#C z6zEGy?8o=S_Tj-{xgJRku%{V_ltnAAHuX(iefW+D6d0$y!0=?um^cz@|l2j4rUiE=m* zAxi2zcG3r&_8Y*)=EU~FN*ZZ?8^p2HrQZ}?d1BkAkB!L1#Sx89pcU!ho;eBxI8hvH+&I?JCLKG^qp+H|25{=ys&UbF0i-$kNzp(oYyK2s zVLctGuy(tgcGzI&r_ z4$>?DZ~35D(S}UabHK9Z8~FW84*vQJV8H7q4$v4@ZF8g8u~frUp z{x~vGH8AqY(iI!{!+Un!Ui$x5X_ud`2<0j~i=FQCOI+@giea!1t?0Jq;UQN%g$>m}9-IWg{+I9UD1s#aRF(!y4VAC$zGMj)RG7g$H!D7(NI*yP_Mq^=! zOne;SSgXoV<)AfQvBPv09DiZ0u2`8~xhkDm;U?r0F`BK)3J^K0)${ksQ4deSR-q6W z)5#vA|7VL0botx0{wEOkcHs?}z#<97-x_i}{|{0QcnZn^g*6ysG06x|K}IO7$#@m^ zwgzI(b#l~(8BlZ3KwYN`vwLh*i9H%7_GnG9UB&2zF}gJwug1SZMeS5Ac80asS-VBW zSi5T?jH9-Bzx{h>ZO5Kn6T9!J?%DF)$$KY~Z??sfZ8x>Kv+}u0b+nwXj*J(%WMFt> zH9s^~%~$d_br@xZV#T7u65zFHDz}x4l1jNEkvg!w(ehw5H(WrZn_#Gt45jjzB}2JT ztO8CoI{-;>%(texW=!#M*|1qgSlL{zS{W)-hekjyqFPfJsb+Iy)yil|wMb>fY!$+h zL6yr7RY!)(NToP)#qnw`TM3kb9)U8ug*&oK09FPR4$A`3E5h;h-LW_>7+QzBZJk!g7g8z8Y^rR zSb_#@Kd^4C&_|VE&y8d&vE)qbTkId~IrwXp6h~~DQh8j;&0J%HBV3Xz4P(z6ty9U! zs8!;Z2Wg7gb!cA$$1fq$_$8DjT0&W(C6pyf!s+=E*m_v{W*&SRKPnu;-BK;aP=^g=(&{u?m(~y)4U017i}G=!U_+$#V(2uyM3ntYGpe z_QBA)a32$eOZMvc3Y|BV+)a9K@zk;j0QeUno_nzZ4b5E09E4 z3DHF#fiEb&0*uFUHfOXqh;$;(DXe0J;+zi)`VO=N85a%mB^4PFsH8-G?rRNg3Q`zWfVQi*7~X=MoZo+8f(2gm|K7rk4kchCKM+AkH^(9L_2aj#jf{pK zy^AHqp^<7O3o%QVjOImus}h>Ed^DA{1;IsjRdZkARa;0pwl%uH~ zJHo2X7jiXGv2OhsbZH1G))~{eg*udnflwL_<%FOQEm9Ci<6E3+sfEGXszY<-GTp+J zL+YT_(7Lh0`f7P(LwMcz8ZkLoI)=C+wwTKnu|Mz+1#lhILu#qFB(BLhhD#1|zmVEo zb`4G0XZgD812c*SW9O|l5*IlQsr*>VjPJ6{V z?G@{^SLF6WEQg+DOtZFzrSY1Qj+Bd{PI{zaHRX{ZEuu$)*VX8&_2b!UUMyEadZS!0 z!lNatT7reNiFLKL92@7UKSUFJ)qsGaMT8IF+a1Z4y00=Do%M@BeI0x6#1gPVnfr<;PN*1&T;Vb5F3CznQpRBN8 zfI85{>?;Em7cc{3>##NiS52!Sb+~{aEwRq9TQSet+vNjBxgjI^QVD!?tN;rz+3zf& zzlTz@=XK8e?MsfA=uEcFTOOO1nCE*>O}3?Gr}jIkCxN0baM8~Du|sTeVl!T5iS2-s zI_v-m@L3n=yyx01K7QdPzDJ=!pa8aOV5`QsmE^t99>NBLO0&Jt4s7Dw9d~zL z4`BmaYqpbdV6zTJyjQQW*!caL*p51>=NupbKI;;kcX3*0vSZ$IVH9QV@gngZaZ<+| zAnDV)a{5V#`_XixUF=E@8%DbyO;^@YrxxCk83rG%kHlNOR`bXVV6zT8yrVOH;;L7F zVtdv}J?{WXpBr{@%{w#8$E&so4Bj)dYVf|-VTfyNB@pja>S6D_4%I2xHNrUWevVoc zj*5#i-fkeS6{*$UyPxZ$J8Iss*#9Zf(@yGj2devwyJEq6W8V8; z(ER4+jZ*FXFZjCSvTOMXZ|rE*F1sM6bvexYaGuo+@BR?m(@yGH2T1x{|0OW*;Fm(U zV1LNqI`}2v^4<0FUhK3)%DQK#fPB_=0f5VbHN)o0-tqb%X@QvXL6Ws)2+R`2-t)RY27X_w%ANppw=!g1~%)yl=tBh>Io6P z0*J7Yu^CEiIRBmhh79p(6I^VOYVar1 z0KSF5^$Ox}#$u7fm-_5*MzN?2P4DS0Lmp$Zm0G$l^&k*{-DIAseiD6lxZC8`+pZqbpS;&r8nc11cTD`a)_83tOsd9>C-3AcLrC6P z)BT4$IiyK6Tk4t?xag#k~jb!_^i7Zc4$XqJi+? zYD9ocF#JITeq@9f`iy=?Zr<7^1oYY$`naFp6_jGhgOt4CO_aEpdbbZ|Z*^74yYx*X zdSjTOy_5kn`d|7cbOD+IZ!B5&lDz%j3gaIYi?NrT)N2lqNdDmMBkxqd?vPoseb{Ix zdC7B5^()U&7mmEM*$_6^jn`~vvmtD6tTB56wB;>(ViOMc#u}e}-p*P2@rFKeh}re+ z9Ozf$=8*Sz!Pj%JSt_>wi649J@dEeUDWHT`NZE7hP(Z76s-RTD-VpQP*#S#HJ~=Q{ zIy(RY*0?X^T^zKU<#Pt&$Fe{2J#NAv)aYi ze%M*p>KZLRLv=Ywujq zYL^d8jCRizRrlyA;jNJ{zKZeg{MHDtS$XQ-rBTbiC2{sb%{@;0371B}QPq7N@BNb9 z5Db3k6+^)HOVE-L-tf`;biL)Ee11cG=%AmjX9s>p=Dd8n&z$ewVJR3e=X-aADEPs5Y&P4R zAACo#X>)#5wb{ghoi^u3RmG;w`N^G{O`hrzi}0&Y?gSpOrKWifY}Pz*(x=Y;bW;a5 zD>fZX4sG$VsRKgIc4!N*+0o?4R*Q{K5jES9t-xk);2oW`*!ZYXvmKoTHe<}7g5L3M zMyNH;BZ*Cf+T+{gk|20N#C!cNn_rw>5XsZR?v&4@P{fQkMbx#XQy+6jMwnQm<>huoSeuTv++&obiIw&&! zW~bpCxIKluaJoyp7>9qe6MVzESmIsUW#}f)aN!)*Xz|i6(V~5Y#M{5yXi@UQc`fmZ zv9^CVv}xTL@t)oj+NM}(2rZx9Q=`pOQ=x4dtCUkys!4Sp#5=p!Xi^?QOErb4XZIRS z8dpHP3-=jKN?xfZO(7TVgC?zeAKshy`fi`Kmk@6!h~7oC->uK)A_Ah#}ZcppEghZUZ+1kclnU%WGX{2(}Zjhh?ZOAqUY zLy+L~-EfJQ9tK(L$ad-xG8PRH3xUt*2>}PosYiIQd<;CW&lbkRC-;iyW6AUKqaaw%lV#wY!X=Y7d@;s-S?!sq|QgI#&z2Vf73)8_Wp@yFmGsz#%n zB%m4vT9))J7_pg2)PAdLEwTBatx?pF-(CP+@i)65pVyW z)D{P^z!HQ-;s7y-V_gyP4n83u!a9so7nB`wTMgz(t|sOyCPf7>sxln%xLIwwkyG{dQq2eh!>Qg=$clRpAp)5$Gfb3+ekK??Mw zkb5TH@dI_F5J?wG3ZV5#@!xfLIVD5&0?=|Vy!fZJTjW#?#Ras?^_x0eoWh~FfR?$Q zeX_QlIJHA@0WEXAS(l5FFccTiGS`p)tX^A`o}svambrdemx~fN6c^Ak*ZzZb+7c-w ziVJ9&>r7oPzg|GgT%XqE^6Le(%=PM@*OSn%7tk`-hsb5dH^Z+vRfY7zF#)vTjp}}m zcluDB7DXb9vI)>~i=Wit;uIIf1+>id%2Uvm%p<@!V904k)D5Xk9?)V0gV85`iyh&E zhihwU2EkpaDGzA-)QunYAaEZ0i`qDxOrzunT9*Ik$WqfK1+ zY6`Su`wD3NQ6KLD zd9NM`m)4H@cnkov;W3|(PX3jlfsy#a2}8$7ZDQ5 z1GIo7ZzAa@fpqjIhCJxU3Q46RDVcy)$`ri0NmZWlPBpv`Y5Cohk3Pb&xp0OCbTR zLJC}>@;-dlkjWSooDQZ?fKH(VN5!STt$`wv#}o?CDU`tVD(}K^1H~8-7?U~GOd$cS zLJC}k^M3JnfMmwFU}72};V1=KKnmGt!111X83xVl*t|}6@y^|MEt}Xi0r#zz(bcPE zd%nMY8Qj78{@%%{WxIFYzwF-0sfn!u{_Y)9lXp$-hJqb?w{M@=yd5Y3VshyMiN0Xe9>mDQ(;y971-H{UFHfU1st_g)b*Y2LbPm4gv>J5kbU#Ne$AJWjSk5qcme#P9=;6!pVirgFrq5Ui)kw z0R5HkiADqA{6h`WRM&7iqDE<^YxwqRL=bV^Rf9C84<|21LD9L45m9jNfTd(a6r4LC zyG^5P-MJSR6<0ll$z(}La*GZETVv5dAnj3uG~CTb7GzWukrWvbg>xh$qHxM&M3m?* zXCzc=SsHaY2@=WWBuFHelOWMtjxU#MgEZ{4Mh>P%X*AaI4R#IClo-)Ml99Zrr5m7iZ zRiiZQDIf|bt7?#Dtil6&(bUo;65gCvh^18B&8aM5AVvAds-DL7J6gKtnVyhXK-<95UTGl076y z1bd*9@i&AuZx0C)K@R)|GA%&EGfilgwZ>~SrUl_3)mp5&zdaK|#VZBvw zoXM`?nh2pr&+_*gwhoSR#9IeP$>S}96WQ{ugQGP1*1@&bcH)~LTPB_O@<wE=Ej`8-zvD4OpV4U!>Q3Xn?*Ao=lW}Hqrved01X$dFCwRa_$Ld5gJfAPT{LC+ z?tq4irYv77&~VMHQ`l2lwGQCiW4>pgMQcJ+bwxm3HqdZQ^n-#^Hx4vh6zSBp0}U5V zI=*e7;i5^$R}NYQ$M+6e2?x%i;iB1&?<2GdF0lI^ttMx}=p-%x literal 43993 zcmeHQeQ=z`dB2kmTmFbK#CLg}DBw6o6~uAxBw4nJQgqgxWEq`wmSiwrIfU&J55|CP zz7wD{anjTyT*|n_bt$DTrLOBC3?($7#6uZ|dKiXrJx+!{JU|(bOEc7ev`mIcLx0bH zJ^SvvclO9jMsv83B)cYn{vKKt>$yU)6uAJ3%|=|XzV()2uhnUQX54^mgGK0gtp zl8HnjljyWkAT0h?3x2mgNSU+ovk5<^TN8=-zm-T(HTp@X(m}d?1YoH|V(!v!bSDyz z&AQvYd**b*7h&q-T?| z3r22∾-YL-_d@nClVz1#~9;SzB?+2~t#+B;^#K=X6TNTU)=|aUoO}tOrgH-G4dCWGQvuS}E}0}+!NC@vw4^Mu3bhAx@`x{6ipp@iQ3@YR z{6TUq+YzV;fk#bER5mrIKn^=XTYBzDT391B_ry#A6}Cc(sDb!RvT+JfQLTlCE*P1C z%$}}mVOIK<&0X8wv)mQ3H_#Mxf(O3E<{U&`m1%jn-B0V=f zpPWSw8@=<8J!1hGy47u^i$VNt{I~`PQbY|>luxWU1wy!$z)Q7~?PzlS^Blg6$-7ATooj|AW@E+M=R zyDN&Vy1Ve9xl5^evRV4w?Gvu0I~TNMUDZ?RMd>BVa`XGB|K?V$ST6rA_t`H+`2l_} zU*%Xam0p?dQ&8<&X$C1z?cB0?+s;Zh7fN+`)SGR}b_M)B7h&OT3!j|6b`Fm(BvV@a zgc~wQ5j99rK6x&uKnQ#8W$9&VE=-rHFJ-em^k>zoRrDv9qhU8RDZ)uwksGw4Buel! z*nb&DzX)7cUZQ5jpA|I9MBuV$4!}{vI&k)AKyHQF;a(-y0X&dW8JWTtYcvR(jT}s& zh5TPmrHn@93|iQ1o#_re1E$&%vyw{*kIIvpkkXm{XdVm5B}H>^3Iv`%`g_durxqty ziz7fE=UXSSGDP09R{a*!(dYpgAW0$Sdfrns^ zXqyt18-Y`yMz{y{&UtyZ5mo)7Qx1>ToN{=DEkSc46^)860rZ^7DF-f*UXt##{U=LL zC(C;e@r(zTNxbk(ks>PNaSCJ}t<2-Bc#3Ijzvj9(cU(S4h!>vA+jLG~zGY^2 zn(s^nyqeo@6f0+U^}S&OR1agfY}gRs2QIF@F~)-e!yfICr=J5Ig=s!!g*gQ>D@@~Q zPr64<9-j@!&iP#Sykt*KeZ^WQoAo_#%|bbu8JMrp6=Vdwi^028eb>Ev6P>g2IuqdP zs|$V>ck`}YyFI*;-R@eStYWW2#Dx9s+gHFiiNw3l_PZbfxw6O!+H_eopNEQ4Y<%*z zY`ROWX@k_h?UzCx=j+jXc9{KqGs zd=k+{^%cqku9ZTCu=Gua6VK>kgjg^_BZPRY%+WXTphk@svIHDOH=d;@jR|(^p}P3I zc+t0S`PEM@%SZHW(>cnXC{6Z^Qjx*>-w2#@o)}1g7~_&z_NZ|Q%i(|h&WSEkB|LVt z>72lPqj4!pg*>5g$$olim9NFZ$SI=YeIUISn~irFCgmdaq9qVx66kem>xNnWqi{kQG*oalLK=KvT_4b_p@KGlAqCs20qu*&T&3UT z*^APP)!=siN)j(mzeefTYS+Njl}CeQ9(j&Tww}lpdeUs>A_OU-D!EdfRtjW=`Hhy= zB~-CyzjdYNQONg3Ah}%59xQeLrL{Sng}pnbLV6O}X#?4clt&)UDG;~~FqB3-oECNT ziJFfeYBDe%r%;&9cK{<04Z^C6Td)6o@0pU`*Jw@D&@9-*Te-G&G6XqfHG@$e8;nx; zu)$n7$|cBZaFow*l)}e`OQe^kv(^G!y#uCVnHl`o;u!uf)_8X9>XC2aAIO0c+8kLTyJmYW*1~F(IO}vXUZF zKK4JAh?K%dHOcMCR&096RgiGw*-KR0O1Z)fV~i(_2nTj301i#SwwO17v9hEiCQ*YF z<0yymtgCz-7J2<~Wq&t){%L(bisGb3 z(KE{F5^6+685$L$RMh8Du74o}x94k`y~KhGf1{4da~7GGQz6VkfQmJqC4Nnr;Rb+2 zo(3Q)Pw1S&H_O>-3K;G%+Fa2NBj;nvCsT0>1fDkM!}|71nY%CB{nO9DtQSsAi@y=r zHk}i+>9W+yQEG^dyF}SAr#F!-^pb&`{zKFtMfoH(ryRJmSjg@PsM&BsDA)WoPKsuq zSDj6x(-S$o{1<;Ax!T)JQajQd;OrHVa*0IItYXmTuyGKjf}%E_2nZk1fz=lCIkNuW z?@={d`Ttv{W9(~#flYYB9znPHmt}geA8AWA~bh) z_M(eo>Y!07DyyBnq_g(+8U)g)UXJ12^mv;Jz;KWEN!Pm~yJBTF*Bi9yZv>{lGu4q! z7l67x>E60ak4N}sqzh49>5vJqY`Vwtz!o+n**y&;|5lOx z=^tp>@y*B{qPnspvkF)up~3LYfQ6_o7&5DZHQ46Pz%vje2Ovw5{(`$b@C<-;%`W3> z?ZlW-a?DcFrbd(;tEyyRzyb@*rbfVms$h11u*`4SF#;xfiC4{bBU!s8nN5v=wO5zS zfYs+1O&?vH+2X&aVY3wdhP}jJnSyF+oNJ*>_=!{T6ZEqnMZc*cr>u%Zj8^;*C4vJDazo;hDu?0qEeVFTr$_96-$#&DWagRQ)cp-vPGF3pVUZmiQm}xP-S4GNJy7J zSs_Dte8Q1oyjZFLr;;DUIdLkJx_9%`cJx^ZvAVK)!ayk6h)ol)`GJATFEs1%{O90n%yq>zPH zTvOI^7!Jev$q0wh{BR{dI;uI$gyAa1(RDgKhvYA#45TBP(41)N@`H{}?-XjKf`Y4jZXursww3{Z=<#m-e@QTqgP3fl2#BsaXpQ>~h7$E?3I1UC6Ick2DPPD@XW&65%&-)iB|yel|W-vgD!E5KTS0AI=#c z8mr_d)?3;Wr3meL0wc;GJVn$JtxJ^B@MvW+k1>lWV+GmXh9V}gP?^a!MOjPO)l7rM ztI4j0Mq`KF@2PbQ(=^-D zH7#EnG;OSDg~>sKN4`iuHlC*0cqH?hG4y+jucoPTWT$j-yi#71uMAeiAMzG6&U*6J zO<+zSy_~#(a=ua?5c9bQwNM{EGKtpd>DO5tjcs-TYO8yIqro{&C+mJ zjlAv}c|FzhN;UYEYVa%7;8#-oJeDKRvgTG-!}9v7l8%m-WS;g&%WB3WJsim+;dNEo zYTf#Lr6AWRp4J#IT4SOkt66t>cv4<E$*JhHGNVyYt<1EBCKn?Bt=7FYivYoTe_V{Go|;2z#H=T0kPuI~s+!D0 z3s1e$0Ihq~n#id~dCFpChcW$DD)fg&BQXPb@{9+2@{|WNM}-;Yn3=b*ec>;kO?I@k zr&|^-pOI)=7^&?^m!gkIFf}qeI77*Taz$3<-yYJUo{%Q}nh)fbC9Z9!qvS zJQrXbEl&b+$_KXhQz65e_ca0&9qH+0$KFr%^+u!SJ<7f`FN9^@^H^9f%{xb|_s(Y} z=UDHDEM4C_AG%6cq~D|~r+m78G+)Xm1`S$=t2aynvo~{j$4B$~5K?cC4bNU+i6N@U z7f_;#?8nbu05LdJ^s2d?=auZnERe3;QUn-U+SunEb@2T`c+P zOJLoVd7$IOr^(`HdL`WXVy zq4*w%qk#jV-u4NI>11Yavg2q#7(n-VH$C}f~Wq0cE`ch+elGgI{1NhI5-E}XP_ged=CD~=N*~U`%DY- zmCut@9CGiG?464|D0l;Ep}cdEKrxZ_9+Y2Q?C7W7nF93vQeP#|-oxXgOC9^u0s!x$1rOP` zKDyNS9vW62FfuLdv;SjXvM}i#w|eJpVLtXHU~-T>wJE23n9nVBG38r(3-h_9z@+mO z?0cN@VZQZc2h;Xvsry@B1}1wj$;l}n=7-B1Oto`h$@$?jV9K7R?a3)0=I*S6Y4;H+ z=k6>pIq;t3t9itw>(&@__5k0f7Pj zYFDG+C%Zl1x>rc>liiJi|Dwk+b+kMDq6emygRgErPWeo|x0f;X29Gu~p0W1!0*}M> zsp&Z7!+U;(oOI-KC2tW4Oj;s7zrtOde26OaC`ykedI!l8;lmXWVWa3dqzZ|^DW3?t zS2=ik#8%$8%HUBb2IEo6hj;LDOC@U;OUNi6l@DHyHr3HU{EfUZN)37A8(*=(c{|M5 z>y59t_S)6wse;`sQ)O45TjdCM`9k5fLp=_}gPDLpY-8+ros`DEs$WXJRsF!MQI2jH1dj`mB_iKk#| zi%{^8PYy@>@#GM>7QlmbEN|(rwiYe@kL97i6($LLa>}RwnbnqT*5;(Zd_g?*pIJ@I zHFE_3NAG5HKE#6s2a&cgumxoy9xO;i9GIsMA*!!l#P^Gq>k*pT7!`=ryWcOmuJ^>C zEd;8leN$0|9{h+;;**^rgpw3Gd2AuU;01uB3*7$gg z$%nX~B{L_J9UrXGJVIgr=q8eLi0W&M*VnSUh`m(O1N*?j4zkOHjK5L#D z3ppZo4&`3B;6aXfYRq-SC(Dkp)KnZT|~Qj5RdYPTFHl_@)34 z;?ebvl=K=N+uN3PkFJN5d_e$Nms39Lp1#V#RL2hlrhNQ5eHAcyo+3<6`7rlva9T_q zU=WznIrnTpi+LWu;G9vakH^Vt9J$qz2utph*BDRJiyApQQp$(9>pCCir2U?G*LA?8 zMGVilobq8FxZd%CvOJa{2|3j{6@f`}_VJCvn$=zx9^2%|q|VTY`cltho9fi_%w`u; zo{F(B&uliBn8foTnBkdsqB%w2a`v%Yn( zO<>Adf7e}lfqyPR{ht1F{BD56R=>`(pAq_6=*SF}u;ls|63a8-45>;DOT^;*xj%2A}j$ zeQmz?0f&b=aA@(^`#?+{Z$Bt`m^FYpz$kdoTIKBr-DSgL583Ys#g|xWZeI4zl`#UL)sB#&QZo!cKKzr zpI=bM$mqGVz$4>3-?3%X2S~LY<$E`j=?jb%t6p5-LErNSw#*t`+f^>JPzEydl?61U zbIR9Rhrbt=*&J}S+8$-j(XPk;Ff5})(=tl`L>b7)7Z*6!74y&I@{6O9gDMGw~s zY2~XMltwp?-dR9HNtEid$!pV=79M^$u4VZ$N<$x>ug|ERj~>TI#8jA(vA=+kWt9b@9{zW+NWTy5b%9(sg>nFCT|^diJ&O{c8}E zj=4ZMvQP2T=CF<xxNEs`*2Z&eOHA2EV%zq0Y!se6{zqoBT7Sb>Kf;C zm4wach5a>TmIvNN8<5N>TTA9g|ExNcIvsDk0cBlpc%w#jb$s4bM_IS}t|zKpoLTm8eD)y2tsQypd9 z>d!n`gS!j{Fx64kt^Quk>S5hc)~&w(FKTfQ>yEN+^%FI#hjm9;xB8x^z+J!DqcOtj zvnnt_=uZBRvhe@Vy&ra0`Z+;Wb0HbUV9bcJt|U+YW%cSR(7{wkS-1K-RNbD~Ek*3P zYg=Y}2g=e&kI_Q@roe4Zg|@7Z9jI;}dkT3hnRy(iv=0C{a>)qbBeW2c5B{n;6BUhN zR7Y7?^#e~=x10*sFx64kt$wm*brI_;s$-VKaS@bmbgccwUsqR?p6&4fY~(~)SI+02 zsjjAqA2HQY)~)_?s$RuGREUYFP7Z>y;kgzLLX(SJhlSei)sQ5372}&9!_Q1hRY-)(fsTZp-k-=mZ6O=Se>?>)QiA@bLdFV$L6Kg1;=~+dl zS&UHHFgl0Zjcgy<_hT@!*GkbaH^E2^jz1%zxzWn}gesifov4RX41yXFT4?@KhGDJgy zsJnrhfP&F1CW?v_WR&`hVpeGDOALY5+_LRvq|dQ&$iY?LVUW5lD|WVItX~WiitdY1 zeQeys^D#=j_81#CfqadFSP^^-Qnyh>0G_-Ric#v-M}WvEJ_cz>6cx9}D2<4s?y8Cp z0_Tqpf^$PLLFDyW4APKgReT?#G-6p5-OX%;$;CyHfpIZWR480b6czjy6D7XOSpjY=OT8|qL1MX_ z28rcz8YG^}sk;W*Aay&fbw44a)H~Ly3k?~dAu-|&Nmj@i%Tcc)bnHgmv=|=*(Mj<@ z)Lo7YQny)Ez*!6gE5wXZw%%6Hla3WZ#t`+kj0zoNfVypA1&c9Cy z#cVN1Lz0L~X|0E@hD@vCwpf%#^ihFX7D!!hq{!P;92P?~ViFa4#UKrtL+~b4YV^*WH3_<6cxS{6G7cT9Sg;TD8)okK}j)DR2WiB6m_FEqtrVe zis&U7SriXMg$~67QL#adgP0ws;?)dNZ%-x=72VS~2ra)<_)g;>Isylmn8yb}7!QNg zw~?o#Y!GY05cQf>gCJakK^m1~kRckE!vd*K4jmm6%N`mehCRs1)P?bl+e3rIki#1@ z!xGgyC`I8A62owH&tzdd6#~O}bvsZfLP&+gFkC!sRfr73#ne{C%rIU&ZB+~n!^IR& z#mq2XJn`Zm6bSnY^$o<#uz2;o(Tjq?ehe1Vb}9g-d3Z7+rYU$$nTTON>x{VyaAENp za1{qG*3N_@oeX7ZmUJqdrCD$)yro%iD%_=6a4P(T;Tjl1t(H~)WkvJgM2JZ9;6%ts z)8J(IO!MGGI8F25nrl1xpL{e;I{E*CFkHM5r6fae7%rY0tB{;#!8K)Sw0ap$jiK2r zp7B%^4r?0^PDSM~T)ei3eTE}ESr8mfD6@3&lvS}g3>QyX6|2K=jjU6Yr!;FFpbmno zP#hMm0ZmODj0w15xCYw6z?rZbhKnPe37%oNc+#m*7>0`{oeFqq7Mu!eX(k+;h2i4a zP6e`Ni*P@H7G@cMp(K=#!L&ZDYLbO z7n!YN$QQ$WH+(Y@tX1(IU%qwwqPsS4-?8PkZHscta@l3sMV&X@xn=97McMA$RB!L{ zo@{p2=I)zTc4cvoO|EMyx3Z_ZXXEmfT{mr7(X(+y_td5IUr)6s62JNP8w(f`z-XvvitG*k^?04>R=X{?!8$2yF|P?I_x>Np*9V;sx0{*jp0>9hOW+g)}# zF!Rj!c|Oncdw#Eb^IvHG`h{k4YbxB#)WZ*$d}{f*BdIgi#AOM~VQYR#;xg@W7|Jgb z?0wO0DIj`KwD&}t=d2RA5hscNOSIm?t%4s4Nxi>|Hphh=j??3^3Gr!O<9N&5gbTVi z3-LiWiGMEYBoEO)_Fx?Ompui7z3A}>Hql!u*pQdR!At6X<#h@E^+m*X6cPWQM4MJj zbgO7TD<<<KgHQN4in;F-INu>iJ59f4+IiE9Z z&=`}ALzHoZvJO-MSCN}JUY%~xFh7(@mv1}g^1B7{1Z6x;S;t}hPMc-ECf%S{fwvDv)eY23UjHse zGf>ZWX&rAM-UKqA0H^G-k21cAT{bv?U6ls6cj_E*$SM=Oi}C#3 zDVBl#@wl)&0Zu)af&Khkbp1_IGtu3QroV}AMt8PZ@-FUXsI#D}uw`Cri95xTq3tDH zAP`cl_%*1kuswUPRR+HoEGm`OhAj3yn5eK>2ij$b=AoiskpznS*Y1GE^fbuWlcF0x z5XZr>he;ei;DGzaH27IXitc)c6pHL&G}k*|q&7D*)*1I=x4|Mm=M6hm^<(g4k1eaO zE8U=r?>Y~bmiV}^bDJ=&tCVq;vi5aB50|f-KNKe+TFFrJFjHBe8+|429OM|y=qo&# z`S_=Cr#N`H*dx`9QN}MR>tpEV3Z9yKRk2OrZ!jj#z3PCej2w7+Z;EcHJ068~dzr+c zZaH+jhlKDNME06~bi2o~F*$)rHzeSnCLtlgdbF2{Cu0wV9v|m+xjAR4N0`?LWgMfd zM`2;_b6FRDj-+%!;-ouF$bQOrl(Jrcbyarj>|te1Pq~K+`Ae`Sxk{ww>|yAsS)29f z7s{e>)E!pdx<_!c%1%dLlZt}An$bjG6AE1E`~L;JisN<#I;-vUonOiYCaW3h4qUI^ zL`U9`@*%l~p(3!h#-26tro{4xI2huJ{UOy1CZN0KxvcrOq+~N^(24KFg@FWK{@gH zeXJR^I`I_9i5>eaTGS3`Tb>3_ic<7fj$%+t?iNg7gJ18XlhBE8)yn!5;Id4RRi8pf zhU6NqdJAqsgcH&$3L55pUctLf;wn8NQgw)Ecy z3l#;nF^`^!N`+uwBTLP|>y2i5@Het|s*yFLR=0mcfh;0|T8;FL$W?NG!kSU5k;^g<62OTk?{>`JFsUssXA-4f1|qOkHI9v^>tr zR@RJKjXdBHaiW1%R*TvJ&g?XZwCa-=&MXJo%ua#t;3j=+T5>aRbF=BG__t=5 zvc{%`E)SoVM&prZAhkoEjZQ}@<-&iXeyw1ae2N$39Po9pENb<_9{;^OGQU-02EAhJ z<1p2cmpnbQgqa*OKyoKdO~b0rJo?5*QV_V#49$&?_;xqk`&e=_sVni3^MpP8BUkE` zA2|H6h4Vww0b4fYq$+kyLJ@fnFIFej zXb822Sma3*(GVomP)e0(OXz5oI{AY#q>>q`*0D4-PTHhN3+bDk+Lvedx3_z@83vwt zp6By?pXc|oyZq+1YVo#eavDk9HES%$X(RPdQ0`PV_Y{;%B)qkNkIBIf9*Wxdgj9-!0hke3Bo?C%T{H z=Lzwr3yA%80g2Cxc1=9pKRGK z*rIO|d!DyAcchzeWp11oxi{TpU3YmO^R{9VKTu5SZWm)7IgLkLzlYSH@DTp@9x|uP zOZM-CSB!f-g5Fn4&|>|IC8U13gzV455>i*`BX-6|bYuO*?(t^}_2YibV|{dh)H?zs z|7w8L{Y&)SrKI2Mr6m7X(H3ka+>f@B`2R%fFC+S|mSG(AKPn^o@#Ppte`h)RXit=< zW#GOrCfe(FagMwISL_JrRfcFoFJ*XzvL3IplN{Wo~!8UK@)0T=|^CSt#ILhJ)6*-Eg4dDa~w6P!=Y(F{;@b z_+9x1&4;zYX#aLb^e*eq=n8FLt7D$|m#H7mibMf?Cla{^v#<|V&;uSF&w^A)%yIkNBf zTI^tskAsCuou<1jh{C6pOk{VP9hxz9*P+vlJS(LfI~ZyNDt4qL&a?-EIIXY1!@2NS z1KYeIL(IU%9cf8R9SSf%;E?7Z(=K%Yw`GIoQD?kfBOEo6|D&+~1CYSwBrP9MfPq4n0$e^I^qGB5I;nR#CUW*U zI~=m35Du zn$T3msL{8>VO9sPRq5!DUyx+rN)g_h<0_o1Nv9utUz`x%g}<#~&FCwSbf{14gh+fX%c8GD?(hhImi$f8!{Gt_ zTIr5pPKo!QkeV`(y~${kV9Y4tc^(el6FsasttU$*n=udnsZEc5zh4%_{;j9=vLxn| zl7kQ?dHu8=`o2Z0F>Hs{)p3xTtBam~1sS#YuGT}$pdOZy!LS{?Nhx5f(`kE7$~tg! zlM!~+(NqsK)}_;Hz8Ox5oCP%00Su^^|Id6m@e@6s;CGsc)*|?WAqpw7!hNJ;uWv>o;_C}jA zro@)Rv2{;#?qc#i=HEA%RThlD9;(Zl(N`i5!VulMjbWOYRTc~l%bG0>tQmbJawi12 zfC&0Zr2m|(@+l(dE0OsSWL+a`Mqi2ajGzjC9q=MGG_rWTUT!pN@AJ{%aj$&c2)}BK zO2DA9`s^Da+~am5Jl+&##-NhB#dG8VhK)_E8hv|za-1>r)jm5aPx4Zeg_?znO&Rq2 zKaiu#O>6@CO7X$-AwwLt8^0nw?wC=+i!uz&%@#HK@*V#3n{u<;&8!)HW&PNg)C>$Q ztQmbJa-T=U#g?|P)P303V$&}EP|{`~Cd&vHJ85bW7Fsgte~inT`ql(B`gT~brG#xy z--J{T2U;yu9}Kl-(wBZLmwwP{`2zZO6R>SMFt=H>^B1HV1`cJ3FY&v=OMD($+kEu+ zq+I`Qn}r&Om9|X!owwv@#6q$l~!hrW#PAZo(NEkSp+d=ceRpMMu1kkD@(HW$NIN%Sj+mcW%OtL z_(ufdIL70+1{?w&N;JgdP?~x&43jV^rHLmY88`J1m;8)NDPatylqL-IP|6s_VS3KJ z_q}`H&uVS$OPguqLE3%y-1FUY&pr2^bML!HWTNds*(ImsRGVBRyBv<20woTIqpB`Y z8wmu%HPRAKur44;k?J}uuMJ{(C>WLUN04X)K;V^s>ge6{Yq_#G^tl3)?sH(EUKIyx0+5w%8R6!LL9NzXkAH^|Hl|pnToxK%G`K z1l_5SSLswzgdo9?LOugQKrOAOU2?fg%YvP|KnP$PkhgPnD6WbIheL7w!=0csN2TzD zLj9DoKXey-haaqZ8+@*UA7uCdJ{#cY68!uMel9@S|Lx8Zb9%;LP9ZQwR6UqeC{T^x zKzBNG3Pu98SiZ#T2+Lk?C>RPYQykSR6jva8t=CfrW4+3YFw4AN#E`wNn&666o|-`2 zGNn8S@W>6ST`XVSCOgq=+@%t<5`gh6m6vlfP^19-2O~*7p@yfNj-dnR&i^iZKn}No z5Fos=c`=9<3IxJIoJ|30*7yYdSuQKQKQPl5^Ll97SPPgJucG~u+)lK)f_mnVe<{g^ zQFVvH!APCr4pl{}L(txEgtq5K!NY-Y$X(*Cs;jPn53ER@!jUBT0UpC7y7wx}*$b4~7UrRU}efxY)3^8N~Md4;rIK_A+C1dEY1|Ym43%U0AA+ z-?+B$C(z-%b{PaSD3g8M3=}B{|M^NQ$tSRvR4jtuZl~L^2F8OM?jM;dxf-=AwZ8GsiP^jQ9Fl_C+!JMtvWy8!S4hv>Bewz5rl9*fs z3K1Z`d}SyJ{SHWrSAnZ>lsXpSBH_YCf^~{64iIh`R~cuZR)HVr5&LG4XWo^@+*u6L z44n-rQV5qZ{4{{;mlhPAIDI383riL98-Ro-1Drz+$u+u#&lwCt@37ewTi9}ki)0{- zzk@;YJ!IjOlICtC6)B0`z)wTFE;Li-=V>rrRYw{?3pV<8XH}iz3}W1F@aL=KN`_Gu zM~4Ypgg^kl1na`pCKG@t!A)u?ZbCQX1>^wRawramBzg9=H%P^+9Y5-5fVvRHwl{!u zx;+M4qoD@-26e!T(KTFG3jBQ6;qp{i#UnU>^v;obGziqtvjIe2hj<0-pk}7M#j8k| zh`(?bWz@eErjw3q|GHwRHqXjo9$soWQ8acVyfYet;l< zm4Swz{8#1XM@}Rg9B#>R`x1|%eu*dUa6Ix76kp%lu=9?^|M;zj4jiO%xk88wJZcT@ zQgM*Xr69xA#L#`xBKINxqmN5COk3p6Hs~$L0ZWftT6Mc7hkL2(4=aP`z-XMvkRzbb z54T0=pcx}{jrSfNbe3dX^+<@V$94t-FiN;-VBd@w#t9uz*9waf55S0%y{BQsjR88Y z28OkfxIb7_@DoH}anX4PS-s7Uz&xciHw0bLz8gDZ6vPZO@63cvH=A0e^-ume{=2aO znjrs-GeGgEHTd}8sF_QFo$Kq42u5Xh)Xtos&#}a8XnI07z8=BvoUH%a2NolmfG+2&)PD*J-N`tMCPZ$i{oHnxK z(raKRaR#unI77#7^bSGdB87fS)=Go>V2liYZ&1QqeO!;!$*?U!#6KwSe+uDEW4-xwP5Cn&h4nSc~K zkXi#jNHrsD2K_3oB|t+2{saXY0#cE(nIhaNHwrR%z>!b?{#$)f(#0(sznO{xl0_*n zU%!XR^x*F{RNn5sJF4;mjc|f|26S-j+Z~}t0sZGMzdW?->W2urxl}~IZ~3!dN&Wn5 zQMH!JX+~dGQ~$&A>pp@(`QVqyt-ZJYeFMsFE)~&Ls><==YlnaT_4vP7jLzfT66kiv z2-Pir0yIsxSc*0N<}orlBk}~-Z4I1VbgfTs?P(oVyltCy#&>N`jExVCX8@q)*G!do zSivfJ;j37!6rR(qJG)Yosj=4S=C*STZ4aJ40CyEP~1TaY92?~}*NF#2t zA;o&Yos!!*H%RE|pV{4&XzO05xEfQ9DX4VQ6HGL;?$*iCsi8#oB(57s$1;_}JRa9S z_`5fM{+qACXu}xx{N#pyGTP=+0MTse>4`gDT>QJCs9H zfx&@UoDa7?$3(@Q1|j0fBqc9}Z#xswp3$bhzOL?`uJzk`%mRaFFZn>)3<5*ETKZvK zoCs_#1uRYA<(;`fgy2A?;avLF_!|rrW3WMS_=mC zoB!VbbzH6uOI|;;tGFHldxfd$;!l94O{y|jtdN4W0Aa0MukY>nwz%Tz>D@R9OxN;d zJPzg9tI>%Rnk!ZS?`Lu9sgGlYb50aVck0PvS{MmDw!Bkr&W^-m->#y9lNwDtB5_s;@r-Fb$H^T0qxb2Fph99q}6YkUA2 zt(POZMi)in3B?_)sHkXyTG!EzmV@i`0HLIKRScO4nW#zUU|@b+_=_)9f<3{^dGp}! zKP8K_xfBqXJ(W9DDlGjkJN{}#ROJM8gFR(|O>yCPNhK&-Xgu?(I}3>d)ZSkN=>p4k-LKGffo+S17cE6-E8QZ#P@9v$+jl)y-D&FXLV*5aJ zN89v<9dUrU{aq$Dx^9t%lP-}N+qOP7KH0UYxqWiF4LhMjGYh-|sxVg$mdKx=SQ8kR z1*9MxAT)@<94siFy%WQe?HjvcfozVo;AE``Fd30^uSar6SPh&FSO-uH76-O@l@dJy z76;a;{CL#}u{d)n0BH6MQ@efWt^1=YhguuZjX}}U-ObY*GQ;EXw#Ll5=+ru8Y3Ecj z)xLe#%-+V?(QQ*hfXm{eatCVCP#=p79Ju@p4h;^?nmYq-`m49U(gU5Ts;c_WAATW3 zI%6&c1g$eeLqqR;{8lr%c*3i40(wU7)LDpkXMD#v95?gpgFIi_?} zGM1r$z4uP`b)lt*EC9J#-lBsn8Xw=WW9ts3Bt4Yqn~XQLqe9j^FPK`#w$8zsuFk2( zXeNUfFSLq9@IblEihxc%?ELsJC<4A6J2ADDD8gI{5TYVTO&HDL^n?4OYHdI_tiz>b z74?IW;VR0X08O)9q#|VsJ^^7up4Ro-J-uV^cwAXB7;j7O+R;6`ezvV?JMc$dXI)2| z*Aa0!9`2s%Y3pq6Y9H-xY;SH}m)Y2k$|hLQNR!;GZ)9+9YEM*I)I8J=wQiuhta1ki zSnHF?*>tRbOuA+@P>D%lT>xp7Q#uf7kK$?UZP~kH=Z4+Dc;itfAfl+{INkQ(;H;nA zyJ_RrR2==PmH&@H9p?YRqrwAq{rD|rh+m)x6=#TFuv9@qBdX_5@QuYU;QTOt8Ivb< z-EhscriPSjCOcE()9LuOuJx(uiN?n5Ly1vft$mes3X!oQYJaM0cCsn8JKECI+PD+7 zZ^3J)%=+*R+AE~{28$i|yW>v9^@Z3FQ5%1PMl_4XQmpYo)3XvN(17r_pdrQe2#gfk zJbw}qz6VsSfmAy+<6Lp+fYj`rn&_LnH#^n8eQ>l-J_u*^-W#1taiTMtO?4%2geJ*z zM`DBcd{v@1k?lygWTVMA)^P%lRu=D!-{_@$T3IyNWKpk4tyi2#b!OwyPPM(O*mJf$ zp_LW4L_2iPsc4J-xht7Xq&BD+#i^Ed9laybD)yncBhjm;r`uH-us^Q7gME{<8)eB@ zI@^@&=!|8v(O5V9Xj17&sfm7)?`>1x-_t1x-_sC<+=|Q#58JWu`eQN-1(>I-0Zb zq~)|gQZlZQMEh+|z?ALP%^DH3y`w4HxiMoUN(W5VXeyP9luWl+kpL_kZP!GhZAHnv zZj85N;|W7soFvR$kt@0`fntcu1m%JH*;q@rUInh*eNk6BmX2&oimdrYEgCFaRQ~*Gvb1}uCM22c zo{}u>zLEJe&=Pk{RDmKXLEXEN5KlE)6H<*jQ)5s`W=`A&Re~@Kbcv#bSUS;^1q-5$ z&_#n%O#V_NvcM&~E|JE8M@KG7PU4mfbO{H94iyFEGtVfYHcR*zQWjyMH1cHHTe4lL z7I9giJeJEOC$VMH2#3pLW3%pvh}tbKp1yjnnDdua{f;?QdEakGKWQfo?Rwl z(Yd2Z^X#(AkmfsO6oJhNvezdFb57p{$^V@)lt!QAAr8yjazOyL4rl_*Ex!x_o`jo< zIK!y`^e}-Sz@!B0cv8L`>p1oqYOlCVkP`@ka76MwCi#zj1`L(IUG(WQEG(3IQsI4~ zQIPHogFx@`?UVc`73iNX;lc_H)TQ=MhenS*2wIJAt^eeTddSESb^Er!DQ-j6QxU@Y zl;nF#^1lttRv8;2-2Ts&%Zu4kO5HQHZ{q&;b@|$^(zw5Uow4n=@^XhY?zc?bbJyoHhm@^GuPP}6Z3)3 zdu(e$R6xDf8Gk*=8ZxI2S**2h+(acZ!m^V`0!uvB8#k#Qxs)XmS)osz7e!z#96*+~}A=WgZQ z6s?9GJrL*---Q3%tr!KU=}Y3IIlabOIJQgVT|K=9x~j`svXJNWZALVDUd9fC3EDo% zcS!P|z73-{HG9bd>TDo46r8USs3#=famjx+Kq66H`jQ3U`#~P%P+d{k*pV{3q*uj*R$woObAv%#t%1E9 zz6`K4)f`MbS*6Ey1dA*OXR1-BG`Yz_muG4y42I_7*{yb}PLObNovATSt^>7P1aTbL zp*!KEHgcer*a%C3vJm4)gb%5BUNu(;#ChngnX9hUs$=KxupSZi zYC;&1^LN1dYf5&q7@2qP%mGPXX$X+--bn_TWj$FyUAT)w<$eW15Q*ttxC`ciE)&WM zw1I<)V=21Z!;Wz5xmF>52>?`R0#&m`X~ za8?onBY@jcB%Mlc1m;pJBX}iFMaL;EYJ|P@Ax+_gekE?4(1#N_ zy$U0Z(b;~s6&j;k?uE_v!)@F~bqXXe*GNlfqKDhSM0Gh-7WRH9$+1ME5d$RzJCp=4 zT}G7!*ufMB6Bj-H0*?qsCi%e>@lW7816D?IFwLPvxx|p`G08V4`46V`A(uYW%0kK$ zncUED;!Z?7PmZ8ZWXQ_K@~JGKE^V-e5@Rpa-lYwoJzZ9n1=uI;6pYVb`l<=8I^g?A zHm-a8pR{8r$n&nmVDN(uYKPVs34-x$$#+unf6$>>p_prxBLhopW3G!IAk(4Ya>9!$ z(gwKy1CoEP3x;L@y_Z!&mWX-0J2xi0;}S5BcLOF*%7RUV@T5Cqd%7owF6iqqg82gV zzdfJ}HSvnf&H0kHM7huR=4ykyX%pHw-wSQ1=~sDEx5PH)Ht{yB-Ukbf%x!{3)NHK0 z8C+r`uiniEfNrA=2vHw*&L-oioPPr5;PeKR*|5m$IONaX*i$J}B62*P`9xkqEgY0cKf!yQ!qW|f8K+v4&Tq5G}EEm-L8k_*aW(C%p<5`r^nByfv z{a_0pFi|)$%%q0*gDo&#YL-^s3@@>bi+x;Nu?;c&IU)I;k^C3?P+mixmk9M#zqPpR z#i?LkPxXVi5R6=2ewWzBYXjEevbX=Fjh9In^x6Q5J8zbk2z+uVH?Vs0K*#Om5CZGc zx;gx z2)xA;BPgLR0Zf!}Zqyn`^tD@kL;kr@grQkrBLDue-28A|gDC$U$@g1$KQe|0mz5MI zHt^oKu>txDUG(1%_-6g@jgyfO(!WHMcP5N>D56jgP8W}NCcqBWtggJ7U}765CXIH; z+t7RP6O%C9JOd2vkU#0{@R_OHn0gQX%oJeqoG`@XPddz#(>Ziu^57?@K^JP?SKgd4 zQ5Wyba`nTaXGOY8c~zL@+Odpjl8jkn;-SG zbWmey?FJ8N=R?W&2^{b4K_kqcNG7)M>MS2(v4tEl!Ym9k&k)05=1;m|e*Hdc$&E3> zYm)B`h!O5X$>+@?6NP^9el9e1lvFKZq(Il)7w-pwd4?DY%%5}upL&1`Ok)KV6E7|i zEyK~n5y^k*0hHC4KqiWJ=1bf(MDfhA!kI6DWvD4&dDFuN{ z^QV)E_;2qAe0=@PcO6t8<_H3+LNC-Du)O(WVk^fF%Q2&f8*P;o^%WN`>Q-CKP5yYd1C={IE;3N2FCEqV0W_pCkI8WA@*!ABY6$BR@QHo^}36ewI zE0lcSqXN2Um*U+asurkvh2Y1&&cKap47-O&B&X{*R0VLJe1=BPpL8SnC!`@Nf6}2o^>_|Nu>31Q%TV>o6ybaBamj~wiIkhiC%3KXo9Kf)wKXJVZOzoq zku{LJwsU51dd=kc?lq%>(|rSd(|yXD$@^AL@0}Q2xpw8$^yEzc^vcyMhlU472Buc7 zz5BkEV|}AYV(u1byzcPWHh3PJ85!x@Is%n|dvNti6?o;^`|iJgwFbbhT`nAe_+*+x zS2zgVwd5e6hQZ742$!RYLxAS4NE<3hyi)+VLOJMN$O2^Gmnjzx!q$d`1F)-<1pv^S zlm!6L>yrxyV40GI0Fczm0s!ds$%O-8@+RjV9-AKAHi!k6Ik`#dlJ2r32fNa#A!5<1 zlD1%}mnUrj6E-y*Z(w`|m%)W7EsnRNG2jkzxB~RX;Q}_}*)3nlX8EdY#tYkwS8X$1 zjlFm_4~Na8Ve??vJQg+&MXkLf8?hIz&K@|p>;ea%wqU8ZN$mu)XF4#+?3oM<%$}*h zj(BWQ8M|_74;H;}Y8w_D*VuxEXDBTH-X1)f{AUXmJ2kNl3(ie!!D0s|1!C#$nc9X0 z7bk4NVs}hA)`CJo+DCSYlw&O@4&+#HzG4d&y+UdWly;5O7BqIT)D|p$vy{UXFqZ6` z-Zm_HH`Eq5cKMTI6<}s~$_9VP!X_;7FJIULN&i%aEm-WPr!83gq9=zdz}VPLPL5T; zh{DZ8_6I*WTp^;7zY$^=6x`0V3(Cd`l_B>lxnKoopS@JE1&iOVv}~;Jf$c0;w`BOX_JthUT z&toz?`bWliXBW0`s9<=%k7E^}ID0%M70VuvNwMtlm<-F7GhU8WxW}Ye_ION=1%LI8 z+e9JuWsk?Cf|)#K0mf>N!=!TA<1i@}yKiTY^7;Kbj#q%r)GKuytPsm!SLo~kV^`+v z1A`+oo4}0sbS_h8U%20)w6d#jw~kI$>>8Y$8Xh032(AeR)&wdlx6TZY3{(WFf_*hL z)!{&(Zm?==Boqk2|GGKU7mS3f!u{2e(AI(4aDQ!8-_7{f3E}Vi|LMO!1`Rl#fQ*w& Hv_1IWnprcs literal 18435 zcmeHPeP~=&7JqL#lgz|PO=8~j1!Ijd9a~M~dvCtmQD@9dnyi_bG>KB(E@RqGwY%wu zY3lk-ky1)=brE-QMT)r6mHbgeiY$vr=}PgBz{+lMi^$R~T@e2Wp|Bz)EPL+9d-uLK zH&cXE#1e9A z&`9gDoQkK>{iK2Jn?_8L6iw;wS5#1LQi9!D$QEfK&_J(vS_Ks;-0y==RrUpr!e3B) z)2CdJk*Y*ggSu`QF*BOf;0nExajbYk&J)cb=)vto%uJ+O!)>OSFpW^SlPJLj$c(6t zB#C4!27fU|YdD-rCS&cx;i#UDw`V%jsc6a!hNiQrxM7$S)QFn~vcMTx**~guAQL>^ z*8~lk(0x6rv`De`7QbP_hi;%N9bLuSTO>nS*RQATM@{HXkJ87PqzOS7riTCO2JTZ^ zhIGjvXzMU0p8*n}wp)3MGl#z?D@mIFFAspxfQMv$MpS%9Y4IHuEa*6VW6t*Xh9C7>oEh(5uG?I`UdPf6^&!x!k$77i=M%K zY3U<52i`BU-GCb%DqhB|`jL_81*#*BEV19%%IMu?_T@kR(klyVRb{-Ev_RKnEV z{NaBz<0#?uHb1(n;m?(Q&Vb?WP*iTXWkZtRAJy0bf8`15bsOHWrE!2)U^f z4-1M@W6Bs4YR&n_w-q5O!$qMurt2n#+mNSlKu@BAz)e~5D7m){9!5h09in{$)FYZ5RE@ zTBU>faI=P5hbUX)!fW8BhCXT>R~%1WGZil@?wKlTxi+XnqdIC?S37`POCKx=Qk%2} zO?5w->XbT};*jjPql8aSYVfY%T{lkDb_%6Nu1=YnOyUI}ew-lzk;RLzTiK89YWP#Y z%v2Iwkn$M@^nf(g^#lz~D4^*v^l2FQ+7v7l#ahxuO)l27=tpiw*ISb6M$;G?d7yKR z^+nT~&mdW`YhCSAHc*U?JX*dW0oQeOVWea6+JOyVxfZ}u(K@XwI!)*`Ns{H@k@2kD z+%KJ-$O0zhi{n{f=gihhx^6`mwN zDH@PB=-EI2d&7>`-z#RNfGllk4N95TU|y15KL@w(*q(j*{+7T0K0AR1p*8u$0Kk2Mn=Q%gfck_J%cuNSij}IEq0rMoBiIk7d*NzkK0>jxpt1c zy-2(GJN&>Md2l-F#PJzA*jkWIyC<7&&ET5R(qXjduFExA?j#n?R9Ae$iwYNXFG)CN zio?B=It%w`;2a%yH+k`{vCR)&mwg6s`1mH^@M8-m=*rB;-ttbnNcN( zoU2EYTIC(sVS66S4*&D{&eMMw&VC2peePd71X0A2xEg0sM7o0ar$2@kyqgkk&nUyF zI^9YxYjSzzR3C*cmm*$kk*7F((H&C_99V0d359a^PVf>6KYZW8O{}Hb**3pa*D#n? zcG8MV0`D>OWS~ik$xTpgfe+n4S30_i%T2JY!z(pjTJ_pmix287)@&ElSZZ^UT&tVKkRU#vSsB!cX`q0}ZOl-j(6Qk#}gTAx+2F6UHTY1)$Vu9Ghh zF6TPMavq875#-&9%4{PEWVVR}GTT5r736F~xj_-jvEdxglGhrW7_Q`t!fAm-#k?)z z{dc?oQ+B&!*6^U?6GN5B$HqidCSbDWN~PsQ#qx+q1YniixJ`w$mBaJ;q5McCUvRa> zMBLovwQ^evhz1`fhz>ko860`kNrH(W_E@5JJllz=u)SQFw4-h(Cm9;a7bYr`Wg)?> zAmRuThbD-_?+}#>mGP}M z{VtAv37SIj7aR-d2$e-{NBU46D^#Faq~&Hh25SOlN<$}4XAGDQ79SC+4FiiZvkE^3 zW5bn)#*3sW*L~Yq{K&xbysaE66P1w?j`?mYx;&dVetk0u{hpW!iE8|wh?w82YFBsH zQYjMbR>ECze^ao_UhEa0s;Xbg>Q{0km|6DFtE(RpB)F+0*CiPq*8Nac|0qY^yE&tYk9P!p_^OS^BH5wML6@{v^gavSAE1t8!k2Mo!a zF-$_;9C=&IzyS?!zzduvpUGH>HC8<-t0(2i1uY{yz~SMp!j~2r|M-N2%f&I`YqI)V zIdZiNOpNw;=*6y}UMlG0@!MD*g{h@sck}pd?rxsrl%2fnMBmmvSEIeOqBd)LsYd(b zZbn?(wrRKhakrcK;)>c)TYIrad+zp?+K$=UbGN(OzOtfr+}6HQqkVGSN^K`>?UU== zZC~WHg~QE^cCkkJjXPM=LS<&NH|}t^Jin4MGueEN^0A&3T4okI*5hvZ>`KbaU}tNT zudSqNTQ@ zmmJ<=bc2bz{j7RKj$G@@;4?()AW;xhudT;DsKKH;QjCSxpKI%_rBoQ37b45RklibW zd0u@*jx0nlCHsU?Kxy&r<*{J9i?L?Z=Oc@EXV5_-dW#o~Z#S^f;o~m1ImTdUVf=Q3 zdw|~GD5gPo7`A2J-w2kWN0WlyymgNlhVC~o%**OAIr7#$kS;h*DFtXJ?`2)$hn{Xx zD5-;yllOvc>|;#c=U?j=^PxK#yIZgIL$~PRrGUsQo7^yDLqvD`80Ltq9+xApYyv9w zA*KN8#K7`Uu=&PNZ^`Pra^%DSB!jL~Oab8BeJlqdMk3s3O#uU*!CCIyeOCVAKEf2h zUe?7tX+;9VqJ;ml4wl2Inj}hNE*Q(hKmu#STrldwoHrQ^zcnV64vd0hzygEfPE4wpm99ThK%2{vnmTl z`(9SRkt4_BFlxdxPXY2`f+6!HX^jTMej}@wa&q6X*9*H6Hh_UzuvqI5UD0%ATm5(Zw4s)=u-f7WebB^ zzWRg_(PD9B3-}y84HckW%rG>5Nci>Z8TIAJ#SFiE361sT{bDFu*}{y_oc!|sx|8Yi z2gIYvs$dxDU0MA=j(q+AjHd9oRM6MO2QfiBzxck=8XAU0vup7|_w1U>F2l(#v0>%U zWc4#SGM5ET!c$ZMr-d9t=0gW5&~c~>xf?+JXmD9nDDQEo9}V6B>UVjD%Eu?WIEaDC zH2W?Oz<4Wp9n|5pZb{qaw#SF$9i7Pf-P?T;fpp;pLMA1ha%-=$WbjW??HM z!$%WVvc!aNKwsEOTp1m(ilBxW6wipyH(peaLMArs%r}H+S4WmdgOVs_bqQBCua2N_ zWF4pqh|NDNhT;Ht1!9XRy#^Z=Z> zK7F`4J1}$LmjnB%vs1gLW~a2JO)vD$K6kL%ySew!?9Aa^v%USjd-hhRcOUBA{L>eD z_fPFZB6>0lGWP7={ZGR8{fDQgr*=-mL!e#l@3nw?H^1=GOZ_$gEzqtWfIItb90aPe z3Ott~`04>@VYUGPQk`u8fRtoc z4?s`&8v(#S31|R-lxJ5DfPQqqwD<0xtv*>rH?DJXbH5y*KU82~H##+VEK;BK1k0+} zdIHAPYZ>0|1BX!f)YYaiwu@@W>C%Ji($nSkJIv|LO@A-M}`8T)_syvWkLDAe3EK)i51j??SdxA#m>7HP*f;xk1U@U1B-7_pwI`;&Q*2@`I z18!C#C3BCk_+M;#14(}3=?NCCoO^=B%I6HO0cWE%bB5Kxh(i6C{!o>{H9`&lw$&>r zsF!;M<>7=H!e2%o9TI` zI&*06f&D$kKx!b`vtj4qz0l}DUlJR!oylFh(g|}awI{k~SA42k-JRG~ TP3b8!wNd&yCP`!P?|=RaZ*}|= diff --git a/src/test/resources/test-input/SSR.parquet b/src/test/resources/test-input/SSR.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9e04cbdcaeda0e76af2b4bd4073780e7e7b953eb GIT binary patch literal 26038 zcmeI5duUwOnZV~tV_A`9*|Bve*NsSA$C-5#uPVus;<|3hyx*gF>b2=kMw-!MG@8*o z^cdyQ)GVQt5Xw?QDJ6tbLMS1Bn9?mFh>|VdQcBn4kB|)|gxyj~2qu)!4JDNQzH`r= zbMC!IvNSUb%kG9UntL9pZ)=ey^e?~EIBBt~VctjI%Qxl6WMEd7^zEf&k@2>drX zc15 z$>#T~BcmYgjBI1lCV@=KL7t+1gBnozp7SbM>IS0REB907t^NOT++r~)ZN*fDe1fvv zlU_-SJSks8=@!yfklqc_m2PM<6uf^$mDvs7$3{l*59Hz>$OkQga+Egk;qTrHsfF!G z+L;Zn#Y(keIK4Ep-Vce6^;CMJ=#M0G!Gxf-_nImx{d^T5u%=_5xMe>Tdf^{VKyEHp znqFHfd%_{#DR>NoN~J(Fuo`oOX15$Ufy5JB4ds-fyh+>z*-pjX`#AKZWi?-q9D~39 zt;M2OK(}H6#Ijw9@Y4R3Q}HYUCo$lz`gL=PNyGJrmw8 zKSJ?m{n$%8z6g#rPu;n=l=Z9$xZO}h>D3CDfKSLzQsD4U&q8iF>I$S5lXFq`bbQ(! zPb8BKOx{h;#8(Q-#Wla9GE-k!SzB5b$T}-1rkoWZO=>(M4^uVd7~0+yj06+dvc+{* zc4c9`A&}OqQrqjeh(y-x2IkH^^Ia) zAoY}?ymC*ov`N%e`8pLfR2+a@|NKUHKDSkKx@WVV29Cs(JF%S%3&ixOIm&>(j!6XX zk@_GPPm|vJA`(*Po?SHq+-QX#ftezV^5Hv_ZNOSOmP=q z7=+wt-4!n`RaaKGXP0Wbv(Dho`Vx+%&3a)b=UVp(Y|yLaDTAWgCXq%L)rc1xg;>$q zXn0ZKRMek#M5gPZHBZ)2TM-C6!PQVs8Ooc)eO-3a7CyLxW_2fa3$t^DTE2D?o0|?Fg;~mR4eg^-5*6G{5PM5HFo_g_h$> z*t!zh;ON3x>UpnQ);LO>ZqmH*!fdS5_cM%7WuE7B=C-a*UeK{ylsH zZzOjdGo@6n;0}?uK``N1cO-X%vxRzMC$Sj`Z3sl2-8C0ePH-rbSbmt}o7hv$PNO{E zR?fe@P>ZkoD;vee_To}QWW%$9Vu}?RCPDn00RD{^Ac(OiUn3{FZI@1fO(~z7B z%})7aJB#t1sk&o1v?yw%psI4(Ska^aZ(tmj;b3?*xe={x#p>Dk?tF3CUm+9S6IgSX z7I(`2kU;jJsFu>l>X^hv-r8B;3^{^Z^ZuZ7 zrxXYZuNN@3)QNpEF1@dN=hHAq7#aXTwW-4g{!i* z=`YOtH_MUj&Dkk@htDKx4WDzfR!i+}#w&t`4d_cKrwkC2P`(>**a6e<3ddB%il6A- z*%Ksi?%g6^m#^zK71sH2*i~5P)osNC%9aSN=6zv2$hE_$Z(+bL{A)-6!zjJd4Rm38aX!#=(&kI+`yQs|K9@ohb~u~J+tyTj>5 zb=4(+OY_P}T^SRWUwq)e^iLJ1Yjd;lN*yO~B=0T;YMW~-bw@6r4T?mZHkMKP7~?>3 zWTb8^#>xSBA0*4`OIvk(KWx?_Gs&VWJ6($lT6BWThB2zIZW1>pPg0|udlGw~=tiMQ zSWLNt;rfiR zz_nt!(yNs;A^CSADq7u1F3rdCN#Beo)u`h6n#)j--VP_=Jz~k=-iqQtGecD3E$dOz7kp6POrFDa;4moz%1QRTp1J< zHmS@XM&V4`*$g)lyS}6^y;OJNJFgO03sf4bm1jIN>=W0IXl%bpn&wFaz2RX9YWZ8>8&nG$%?((fEJ!%;f$}b10K`y${ z2$jZMFj4o-G-5UH&Z=}~Jh0-7Z@V1p@sMw!AUa^)zD`~RuhQnp=l|vE{GJ>;BbKeN z=1cX=$e8X)6jSYEWfk__yhJjFY)41cQz7*KaDKGk8B(v72O(lw11hiqiSH*P8X~FW za0=0*@ZUbiQy2p#eSg8ORbPyFD>XMzXDG;3SY!019S2(UdAd%Zy9~P;4eSmB0cT@?Kik zKb6d&%B+XA97j53$kCcehXgcpO2cW2gZNCutT-me64S=gsFR>HCc8aJV`;a?2`y-j zL`@_ndjnA{&6^vI0Y=fAn;JtSpQJ_z#xnky=Gf|IscIsRbwphmmp`Clwff#zCgyQz zR86r6G$WQEy@fMJOZKF~8Hd+vEbVtVq0y$&?pRoq><9-$$*!rWFgcbIB_}*hHs8dk z6U^^}fr^QzsWC9om2@N$YVs*Q+2x3+se?2%;lW`QO9V41e@x#4zDP>*5xh*#tP>}c zz6&ui>I@na{VY+_E*!uHx&t1I+y_c)+9h362~P%f#tzVWiwyVX=pw`21x?U7D%9;9 zwnoh%pKdT04&*en7I|bkaQO}w@k3aZ%ks>^(2pePh#|9*s}lp?Nb4cen2qk?fNW&D zr^vuVsm9vaX9*9uArX4o0iEyAdrs29Iy+!Z(yKH}{lmE?wWT;PG8soKmPw|YL-;JQ zpPB`;JQEIh(%Pu!nV1)Iq%xxiWsYf?GDpRp3`A2N)w7@pzQEM*IGSR)sbC+R#Y?ZrkC5C@2 z7W;1`4o{an1pkI)`b+MK2y>Ez({^bY({B|GQ-SNP<#Gxa*}z zfnuNanE7;xNBHkbwhtuxXFbS2_zI)|{w~+sn)0~DgJmUw|D$C4fnt~W}PqKe<53>muEAk(cz0q@(V#XQZv zf01lAy6oRRjl4r%T@*<9&Ka>3&EUV4Z0}0;@0>v?2VGSZpg%mz(9HVH1wS#$o@Bcv z**`ps`2DUZ3ec~@-2%-OXlQQ3K+vjVcvS|+faeOrsH*_|rj4PsZlqfdf_+J{y)4<^ zv;mggR`bl>$wNf6zZi# z0p+g(3fmq>ynrZyKh2Pef3+H2>EcA2?bL29x+Ryn|aPadyk-$yI&g=2zdRn zSO6Rg5CI=awm(Vs*Ds@hL);G(NO*aeO?YkK&=UqCL7nI?4kF^1P2l5q65$B=&4yMF@u z>*Hn<)6*8t#IKKoiT4d(3$VYKP?g37vt|?ZyoOMp1sD2@3DlXtB*-tE_a<4ZwXUZ} zJB0AOWP4Gv?@c0uT?Q0r@|(w)CK`wy1`!ah;D7U&iqUQg@{0WOs#y_wYDDU*Ym#4H zMZ$iU00nA&=b9NyPn`(%Rmt`Stfa0X)_na>0Q~t^%|LoyMS$u%{pVjr;K7&u1n`%> zW`@&KFAo0F*ARa2)jk3I)2|EQ>e(5A-;```O7>5`j&R|PJ^}NM$IVQ7@J5)bZ+hc# zWFF$`oaUGX^*vAQDvH^okQE&QTVWKXoyzcFxv3G2y^JbsZ7>D>nmcCv^l2 zhj1MCSMDQnf9GLb;YlP1|DETE@K3zvZlq_K1pB6BdmDC*UhKwwF8c|(?xvrq+;j=* zQ7EVEP5%*kY`=vwmTnKh@^Auf)OB7=UDb$eu)fxR+oW zleIf{#tjG91g*YE_7bSF`L>`y?T?d8Z4RoPs1vB_+&)eoaVS5RX8IbB&FPsu!K)ts zxiso5ygevT`_1Y7+4Ml4u+_!go72cP-XIiUKb+YgOOK5SR(-vGID^=O?+prs?9DNg z8|{KV8o~*j+4tta>_=IQy)(am8)!g+;MEZ>FDFW*1`=9_~8>}wf@<+Y{}6#`X{5MRq6)NplB zK>YQSjK~Kzv_K3&s{ZfSPa<;v8-oJ+_ZJwQH$yX&L-6Wu`uhur7vB~Xpx<3Q7(GVN zFG{wT;J|kg(fpl30r2A-1M*=Q7Ca(EI9d3;WdArvY;l)+f&wYGm)OKPAP7Z7slNO6 zQqyPQX^;(^fVw;*cLxO`?kqDAywOFAvOCL7I6@g5pF|G)#wxQCKVGyz7*U+QFIS1x z_q|0ZQ2lm+sjj|&M4>byLv`r43!wioP*r%jP$1#=Yhnp#9E|^|0XI&<@7IoKq0fp; z0zZyWf{FfF@rd*nO3ePe2jD%s`Z9W>?x*LdsA#-zw)x8T{#r6{@CDHG+fNF{@5Ta!5u>Z z>*c4|1mwcX>dCF@Q(t}xrX9Olh;ENPiQL{>PYan|qNB=u>uF%J`-RA4Pa@{~&#*Rg zp>BeF*7m$)fBzY5wRYK1(9#c|$ zk~mze6o=t%Vr4U5AFh_Shu8DO#G-k-u>5LakoiT&!QXd|_!dU(D4m zT>HDHE|jwCm_=g7m82_YUeLs(7+e2J@w2pmzw|@|8E_DcO+Tu z)lxmboX3Pi%O_!eMu5g1sR4K?hF!#Ln?u9;?Q_%!en&VEUG4}6;>R7~(A%dSV3A0C zM>w6a5xEfC5q&ykBQAo@^g8k^qmzWj%-aXiC^-YQIBRJ*ydxZle7DUp#JL%-Rf`)U z+YH#E!B}9sZ4`@cw~gW=+3mB`kab5m5T$OLLxa&B;XvrQZ4M1CcZ35mUB+qAI5fE1HiyP`+vd<1Z`&Li)NPwXW4Uc} zXxO%WjvAqDn?s|rZFBHI2u?y6r`7S?JlN|H3gWBnlXSoBXdg#os0`HNq@_O~VVqWt zPJeXL2@r{lwhwB?Lfc1a(N9Lol{aw%eCWL>T_vmGO@?Wu*VGUp4a{@`MS$#AIt8LaiDYJp3}YF9BDs${^{l_Sli;mMBi=;?LGc<2!!1j%^q8juDZ8L%Y->Ozbi zVKw849ih=MVnl_-KYXhME46$_p4b8O!Ad$J& z0W>hzI)KLIS_jbS)H;Ah-&zN7zwTrZh{GIV^~ez3RuSyCql|FWnZZJ8tuyGaQyJjM zevSU3^~i*8JY!LUG#>W1>(%pH`D$&oTsl8GJUTKwa{ki7=4vr_e&ot%c5-q8{(al8 zPLAFOND (CRPCEN);0;0;13;0;;0;;1;Z;23;0;13;;;0 +6110;99;ASN;IJ NORMALES + 6MOIS;1;9;99;0;;0;;1;Z;23;0;13;;fermé le 30/06/2008;0 +6111;4;NOR-/REN-/MIN-;IJ NORMALES -3 MOIS;1;0;13;0;;0;;1;Z;;0;13;;;0 +6112;4;NOR+/MIN+;IJ NORMALES +3MOIS;1;0;13;0;;0;;1;Z;;0;13;;;0 +6113;4;1/1-;IJ REDUITES -3MOIS;1;0;13;0;;0;;1;Z;;0;13;;;0 +6114;4;1/1+;IJ REDUITES +3MOIS;1;0;13;0;;0;;1;Z;;0;13;;;0 +6115;4;MAJ-/REN+;IJ MAJOREES -3MOIS;1;0;13;0;;0;;1;Z;;0;13;;;0 +6116;4;MAJ+/MIJ-/MIJ+;IJ MAJOREES +3MOIS;1;0;13;0;;0;;1;Z;;0;13;;;0 +6117;4;MIT-;IJ PARTIELLE, PERTE DE SALAIRE -3MOIS;1;0;13;0;;0;;1;Z;;0;13;;;0 +6118;4;MIT+;IJ PARTIELLE, PERTE DE SALAIRE +3MOIS;1;0;13;0;;0;;1;Z;;0;13;;;0 +6119;99;ASM;IJ MAJOREES + 6 MOIS;1;9;99;0;;0;;1;Z;23;0;13;;fermé le 30/06/2008;0 +6120;4;ITI;INDEMNITE TEMPORAIRE D'INAPTITUDE AT/MP;1;0;13;0;;0;;0;Z;23;0;13;;;0 +6121;4;PRE;IJ PRENATALES;0;0;13;0;;0;;0;Z;23;0;13;;;0 +6122;4;POS;IJ POSTNATALES;0;0;13;0;;0;;0;Z;23;0;13;;;0 +6123;4;ADO;IJ EN CAS D ADOPTION;0;0;13;0;;0;;0;Z;23;0;13;;;0 +6124;4;ISM;IJ CONGE SUPPLEMENTAIRE PREMA;0;0;13;0;;0;;0;Z;23;0;13;;;0 +6125;99;IRT+/IRT-;INDEMNITE REMPLACEMENT CONJOINTS COLLABORATEURS TI;0;9;99;0;;0;;0;Z;23;0;13;;;0 +6126;99;FGP;FORFAIT GROSSESSE TAUX PLEIN TI;0;9;99;0;;0;;0;Z;21;0;13;;;0 +6127;99;FGR;FORFAIT GROSSESSE TAUX REDUIT TI;0;9;99;0;;0;;0;Z;21;0;13;;;0 +6128;99;FAP;FORFAIT ADOPTION TAUX PLEIN TI;0;9;99;0;;0;;0;Z;21;0;13;;;0 +6129;99;FAR;FORFAIT ADOPTION TAUX REDUIT TI;0;9;99;0;;0;;0;Z;21;0;13;;;0 +6131;4;CUN-;IJ NORMALES POUR CURE THERMALE;1;0;13;0;;0;;1;Z;;0;24;;;0 +6132;4;CUM-;IJ MAJOREES POUR CURE THERMALE;1;0;13;0;;0;;1;Z;;0;24;;;0 +6133;4;CUR-;IJ REDUITES POUR CURE THERMALE;1;0;13;0;;0;;1;Z;;0;24;;;0 +6134;99;AAM-;IJ MALADIE PAMC MOINS DE 3 MOIS;0;9;99;0;;0;;0;Z;23;0;0;;;0 +6135;99;AAM+;IJ MALADIE PAMC PLUS DE 3 MOIS;0;9;99;0;;0;;0;Z;23;0;0;;;0 +6191;99;IDN;INDEMNITE DE NOURRITURE;0;9;99;0;;0;;0;Z;23;0;13;;;0 +6211;99;AFE+/AFE-;ALLOCATION FEMME ENCEINTE;0;9;99;0;;0;;0;Z;23;0;0;;;0 +6212;4;PER;IJ CONGE MATERNITE AU PERE;0;0;13;0;;0;;0;Z;23;0;13;;;0 +6213;4;;INDEMNITE DE REMPLACEMENT PATERNITE;0;0;13;0;;0;;0;Z;;0;13;;Pas d information sur la prestation;0 +6214;99;ISP-;IJ CONGE SUPPLEMENTAIRE MATERNITE PAMC;0;9;99;0;;0;;0;Z;23;0;0;;;0 +6215;99;IRN+/IRN-;IJ CONGE POSTNATAL PAMC;0;9;99;0;;0;;0;Z;23;0;0;;;0 +6221;4;ARM;ALLOCATION REPOS MATERNEL NORMAL;0;0;13;0;;0;;0;Z;21;0;13;;;0 +6222;4;ARA;ALLOCATION REPOS MATERNEL ADOPTION;0;0;13;0;;0;;0;Z;21;0;13;;;0 +6231;99;IRM+/IRM-;IJ CONGE PRENATAL PAMC;0;9;99;0;;0;;0;Z;23;0;13;;;0 +6232;99;IRA-/IRA+;IJ CONGE ADOPTION PAMC;0;9;99;0;;0;;0;Z;23;0;13;;;0 +6233;99;IRP-;IJ CONGE PATHOLOGIQUE PAMC;0;9;99;0;;0;;0;Z;23;0;13;;;0 +6234;4;IRG;INDEMNITE MATERNITE EN CAS DE NAISSANCES MULTIPLES;0;0;13;0;;0;;0;Z;21;0;13;;;0 +6235;4;IRC;INDEMNITE DE REMPLACEMENT CONJOINTE COLLABORATRICE;0;0;13;0;;0;;0;Z;21;0;13;;;0 +6236;99;IPA-;IJ PATERNITE PAMC;0;9;99;0;;0;;0;Z;23;0;13;;;0 +6237;4;IPC;INDEMNITE PATERNITE CONJOINT PAMC;0;0;13;0;;0;;0;Z;21;0;13;;;0 +6238;4;IPI;INDEMNITES PATERNITE CONJOINT INFIRMIER;0;0;13;0;;0;;0;Z;21;0;13;;;0 +6239;99;IDA;INDEMNITÉ MALADIE DOUBLE ACTIVITÉ PAMC;1;9;99;0;;0;;0;Z;21;0;0;;;0 +6241;4;AVP;ALLOCATION ACCOMPAGNEMENT FIN DE VIE CESSATION ACTIVITE TEMPS PLEIN;0;0;13;0;;0;;0;Z;23;0;13;;;0 +6242;4;AVR;ALLOCATION ACCOMPAGNEMENT FIN DE VIE CESSATION ACTIVITE REDUITE;0;0;13;0;;0;;0;Z;23;0;13;;;0 +6251;4;NNO-;ALLOCATION NUIT NORMALE - 3 MOIS;0;0;13;0;;0;;0;Z;;0;13;;;0 +6252;4;NNO+;ALLOCATION NUIT NORMALE + 3 MOIS;0;0;13;0;;0;;0;Z;;0;13;;;0 +6253;4;NME-;ALLOCATION NUIT MAJOREE 3 ENFANTS - 3 MOIS;0;0;13;0;;0;;0;Z;;0;13;;;0 +6262;4;EME+;ALLOCATION EXPOSITION MAJOREE 3 ENFANTS + 3 MOIS;0;0;13;0;;0;;0;Z;;0;13;;;0 +6263;4;EMN;ALLOCATION EXPOSITION ARRET + 6 MOIS;0;0;13;0;;0;;0;Z;23;0;13;;fermé le 30/12/2006;0 +6264;4;EEN;ALLOCATION EXPOSITION ARRET + 6 MOIS ET 3 ENFANTS;0;0;13;0;;0;;0;Z;23;0;13;;fermé le 30/12/2006;0 +6311;4;;RDS IJ MATERNITE;0;0;13;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +6312;4;;RDS ALLOC MATERNITE;0;0;13;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +7111;8;PI/RPI;PENSION INVALIDITE AVANTAGES DE BASE;0;0;15;0;;0;;0;Z;;0;0;;;0 +7112;8;FN/RFN;PENSION INVALIDITE ALLOCATIONS SUPPLEMENTAIRES;0;0;15;0;;0;;0;Z;;0;0;;;0 +7113;8;TP/RTP;PENSION INVALIDITE MAJORATIONS POUR ASSISTANCE D UNE TIERCE PERSONNE;0;0;15;0;;0;;0;Z;;0;0;;;0 +7119;8;;PENSIONS D INVALIDITE SERVIES PAR LE REGIME SPECIAL DE SECURITE SOCIALE DANS LES MINES;0;0;15;0;;0;;0;Z;;0;0;;Pas d information sur la prestation;0 +8111;10;RVI;RENTES DE VICTIME;0;0;17;0;;0;;0;Z;18;0;0;;;0 +8112;10;MTP;MAJORATIONS POUR ASSISTANCE D UNE TIERCE PERSONNE;0;0;17;0;;0;;0;Z;18;0;0;;;0 +8113;10;RCS;RENTES DE CONJOINT SURVIVANT;0;0;17;0;;0;;0;Z;18;0;0;;;0 +8114;10;REV;RENTES DE REVERSION;0;0;17;0;;0;;0;Z;18;0;0;;;0 +8115;10;ROR;RENTES D ORPHELIN;0;0;17;0;;0;;0;Z;18;0;0;;;0 +8116;10;RAS;RENTES D ASCENDANT;0;0;17;0;;0;;0;Z;18;0;0;;;0 +8117;10;FIA;MAJORATION FAUTE INEXCUSABLE RENTE ASCENDANT;0;0;17;1;25;0;;0;Z;18;0;0;;;0 +8118;10;FIR;MAJORATION FAUTE INEXCUSABLE RENTE VICTIME;0;0;17;1;25;0;;0;Z;18;0;0;;;0 +8119;10;FIC;MAJORATION FAUTE INEXCUSABLE RENTE CONJOINT;0;0;17;1;25;0;;0;Z;18;0;0;;;0 +8120;10;FIO;MAJORATION FAUTE INEXCUSABLE RENTE ORPHELIN;0;0;17;1;25;0;;0;Z;18;0;0;;;0 +8121;99;PTP;PRESTATION COMPLEMENTAIRE POUR RECOURS A TIERCE PERSONNE;0;9;99;0;;0;;0;Z;18;0;0;;;0 +8221;10;MCR;MAJORATIONS CRISTALLISEES;0;0;17;0;;0;;0;Z;18;0;0;;fermé le 31/12/2006;0 +8222;10;ROB;RACHAT OBLIGATOIRE;0;0;17;0;;0;;0;Z;18;0;0;;;0 +8223;10;RFT;RACHAT FACULTATIF TOTAL;0;0;17;0;;0;;0;Z;18;0;0;;;0 +8224;10;RFP;RACHAT FACULTATIF PARTIEL;0;0;17;0;;0;;0;Z;18;0;0;;;0 +8225;10;;TRANSFERT DE CAPITAUX;0;0;17;0;;0;;0;Z;;0;0;;Pas d information sur la prestation;0 +8226;10;ICA;INDEMNITE EN CAPITAL ACCIDENT DU TRAVAIL;0;0;17;0;;0;;0;Z;18;0;0;;;0 +8227;10;FII;MAJORATION FAUTE INEXCUSABLE INDEMNITE EN CAPITAL;0;0;17;1;25;0;;0;Z;18;0;0;;;0 +9111;12;P01;COMPLEMENT TICKET MODERATEUR;0;0;19;0;;0;;1;Z;21;0;39;;;0 +9112;12;P02;COMPLEMENT FRAIS DE TRANSPORT ET DE SEJOUR;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9113;12;P03;FRAIS OCCASIONNE PAR LE DON D ORGANES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9114;12;P04;COMPLEMENT AUX FRAIS D HOSPITALISATION DE LA MERE QUI ALLAITE SON ENFANT;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9115;12;P05;PERTE DE SALAIRE POUR ENFANT MALADE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9116;12;P06;INDEMNITES JOURNALIERES MATERNITE POUR CERTAINES CATEGORIES D ASSURES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9118;12;P08;FRAIS FUNERAIRES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9119;12;P10;COMPLEMENT POUR CURE THERMALE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9121;12;P11;COMPLEMENT TICKET MODERATEUR POUR ENFANT DE MOINS D UN AN;0;0;19;0;;0;;1;Z;21;0;39;;;0 +9122;12;P12;ALLOCATION DECES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9123;12;P13;COMPLEMENT MALADIES CHRONIQUES ET MAINTIEN A DOMICILE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9129;12;P09;INDEMNITES COMPLEMENTAIRES EN REEDUCATION PROFESSIONNELLE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9131;12;PSD/IPS;INDEMNITE DE PERTE DE SALAIRE Y COMPRIS DIALYSE A DOMICILE;0;0;19;0;;0;;0;Z;;0;0;;;0 +9132;12;TTH;FRAIS DE DEPLACEMENT EN CURE THERMALE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9133;12;HTH;FRAIS D HEBERGEMENT EN CURE THERMALE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9134;12;PFR;PRIME DE FIN DE REEDUCATION PROFESSIONNELLE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9135;12;SEC;AIDE FINANCIERE EXCEPTIONNELLE (SECOURS);0;0;19;0;;0;;0;Z;;0;0;;fermé le 20/02/2004;0 +9141;12;VA;MAJORATION LIEE A UNE VISITE D URGENCE;0;0;19;0;;0;;1;Z;20;0;49;;fermé le 15/09/2014;0 +9142;12;KAU;MAJORATION LIEE A UN K D URGENCE;0;0;19;0;;0;;1;Z;;0;49;;;0 +9143;12;;SUPPLEMENT POUR SOINS AUX POLYTRAUMATISES;0;0;19;0;;0;;1;Z;;0;49;;;0 +9144;12;;HONORAIRES REMUNERANT LA PERMANENCE TELEPHONIQUE SUR LA BASE DE 3 C DE L HEURE (ASTREINTE);0;0;19;0;;0;;1;Z;;0;47;;Saisie manuelle Qualiflux;0 +9151;99;;PLAN SEGUIN;0;9;99;0;;0;;1;Z;;0;22;;fermé le 20/05/2011;0 +9152;99;;PLAN EVIN;0;9;99;0;;0;;1;Z;;0;22;;fermé le 07/07/2011;0 +9161;12;;PRESTATIONS SUPPLEMENTAIRES ALSACE-MOSELLE;0;0;19;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +9162;12;SNM;SURVEILLANCE MEDICALE MATERNELLE EN ACTION SANITAIRE ET SOCIALE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9163;12;;CENTRE EXAMEN SANTE SAISIE MANUELLE;0;0;19;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +9164;12;;PRESTATIONS D ASS SNCF ET REGIME GENERAL;0;0;19;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9165;12;;PRESTATIONS D ASS SNCF ET REGIME GENERAL;0;0;19;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9166;12;;PRESTATIONS D ASS SNCF ET REGIME GENERAL;0;0;19;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9167;12;;PRESTATIONS D ASS SNCF ET REGIME GENERAL;0;0;19;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9168;12;;PRESTATIONS D ASS SNCF ET REGIME GENERAL;0;0;19;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9169;12;;PRESTATIONS D ASS SNCF ET REGIME GENERAL;0;0;19;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9170;12;;PRESTATIONS D ASS SNCF ET REGIME GENERAL;0;0;19;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9191;12;;TICKET MODERATEUR - PREVENTION BUCCO-DENTAIRE;0;0;19;0;;0;;1;Z;;0;22;;;0 +9201;99;EEP;ENTRETIEN EVALUATION PSYCHOLOGUE;0;9;99;0;;0;;1;C;20;0;1;;;0 +9202;99;APS;ACCOMPAGNEMENT PSYCHOLOGIQUE DE SOUTIEN;0;9;99;0;;0;;1;C;20;0;1;;;0 +9203;99;PSS;PSYCHOTHERAPIE STRUCTUREE;0;9;99;0;;0;;1;C;20;0;1;;;0 +9211;12;VCC/VAC;VACCIN (MILITAIRES) / VACCIN GRIPPE (CRPCEN);0;1;19;0;;0;;1;Z;21;0;5;;;0 +9221;13;TDF;TEST DE DEPISTAGE RAPIDE (FOURNISSEUR);0;0;20;0;;0;;0;Z;21;0;0;;;0 +9311;13;BD4;DEPISTAGE DU CANCER COLO-RECTAL;0;0;20;0;;0;;1;Z;21;0;5;;;0 +9312;13;;DEPISTAGE DU CANCER DU SEIN (PREVENTION MALADIE);0;0;20;0;;0;;1;Z;;0;20;;Saisie manuelle Qualiflux;0 +9313;13;BD3;FRAIS DE LECTURE POUR MAMMOGRAPHIE;0;0;20;0;;0;;0;Z;21;0;0;;;0 +9318;13;;ANALYSES DEPISTAGE CANCER UTERUS (PREVENTION MALADIE);0;0;20;0;;0;;1;Z;;0;20;;Saisie manuelle Qualiflux;0 +9319;13;;PRELEVEMENTS DEPISTAGE CANCER UTERUS (PREVENTION MALADIE);0;0;20;0;;0;;1;Z;;0;20;;Saisie manuelle Qualiflux;0 +9411;13;;CONSULTATION HYGIENE BUCCO-DENTAIRE;0;0;20;0;;0;;1;Z;;0;20;;Saisie manuelle Qualiflux;0 +9412;13;;HYGIENE BUCCO-DENTAIRE N91 (SCELLEMENT D UNE MOLAIRE);0;0;20;0;;0;;1;Z;;0;20;;Saisie manuelle Qualiflux;0 +6254;4;NME+;ALLOCATION NUIT MAJOREE 3 ENFANTS + 3 MOIS;0;0;13;0;;0;;0;Z;;0;13;;;0 +6255;4;NMN;ALLOCATION NUIT MAJOREE ARRET + 6 MOIS;0;0;13;0;;0;;0;Z;23;0;13;;fermé le 30/12/2006;0 +6256;4;NEN;ALLOCATION NUIT MAJOREE ARRET + 6 MOIS ET 3 ENFANTS;0;0;13;0;;0;;0;Z;23;0;13;;fermé le 30/12/2006;0 +6257;4;ENO-;ALLOCATION EXPOSITION NORMALE - 3 MOIS;0;0;13;0;;0;;0;Z;;0;13;;;0 +6258;4;ENO+;ALLOCATION EXPOSITION NORMALE + 3 MOIS;0;0;13;0;;0;;0;Z;;0;13;;;0 +6261;4;EME-;ALLOCATION EXPOSITION MAJOREE 3 ENFANTS - 3 MOIS;0;0;13;0;;0;;0;Z;;0;13;;;0 +9704;12;ASP;COMPLEMENT D ACTION SOCIALE PROTHESES DENTAIRES (CLERCS ET EMPLOYES DE NOTAIRES, PORT AUTONOME DE BORDEAUX);0;0;19;0;;0;;1;Z;20;0;38;;;0 +9705;12;SSP;COMPLEMENT D ACTION SOCIALE DENTAIRE ALSACE MOSELLE (CLERCS ET EMPLOYES DE NOTAIRES);0;0;19;0;;0;;0;Z;21;0;0;;;0 +9706;12;SOL;COMPLEMENT D ACTION SOCIALE OPTIQUE (CLERCS ET EMPLOYES DE NOTAIRES);0;0;19;0;;0;;1;Z;21;0;19;;;0 +9707;12;SOE;COMPLEMENT D ACTION SOCIALE OPTIQUE, ENFANT DE MOINS DE 16 ANS (CLERCS ET EMPLOYES DE NOTAIRES);0;0;19;0;;0;;1;Z;21;0;19;;;0 +9708;12;SOM;COMPLEMENT D ACTION SOCIALE MONTURES (CLERCS ET EMPLOYES DE NOTAIRES, PORT AUTONOME DE BORDEAUX);0;0;19;0;;0;;1;Z;21;0;19;;;0 +9709;12;SOV;COMPLEMENT D ACTION SOCIALE VERRES (CLERCS ET EMPLOYES DE NOTAIRES, PORT AUTONOME DE BORDEAUX);0;0;19;0;;0;;1;Z;21;0;19;;;0 +9710;12;;COMPLEMENT D ACTION SOCIALE LENTILLES (CLERCS ET EMPLOYES DE NOTAIRES, PORT AUTONOME DE BORDEAUX);0;0;19;0;;0;;1;Z;;0;19;;Autre regime NEC;0 +9711;12;LYA/LYB/LYJ/LYT;PRIME DE LAYETTE (CLERCS ET EMPLOYES DE NOTAIRES);0;0;19;0;;0;;0;Z;;0;0;;;0 +9712;12;VOY;ALLOCATION VOYAGE DES ENFANTS A LA COLONIE DU PRARIAND (CLERCS ET EMPLOYES DE NOTAIRES);0;0;19;0;;0;;0;Z;21;0;0;;fermé le 31/12/2008;0 +9713;12;COL;AIDE AUX SEJOURS DES ENFANTS (CLERCS ET EMPLOYES DE NOTAIRES);0;0;19;0;;0;;0;Z;21;0;0;;;0 +9714;12;FAM;ALLOCATION VACANCES (CLERCS ET EMPLOYES DE NOTAIRES);0;0;19;0;;0;;0;Z;21;0;0;;;0 +9715;12;SSO;AIDES FINANCIERES INDIVIDUELLES OPTIQUE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9716;12;SSD;ACTION SOCIALE DENTAIRE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9717;12;SSA;ACTION SOCIALE AUDITIF;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9718;12;CCT;ACTION SOCIALE CURE THERMALE;0;0;19;0;;0;;0;Z;21;0;0;;fermé le 01/01/2000;0 +9719;12;SEJ;AIDES FINANCIERES INDIVIDUELLES HOSPITALISATION;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9721;12;DIF;AIDES FINANCIERES INDIVIDUELLES DIFFICULTES FINANCIERES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9722;12;AAF;AIDES FINANCIERES INDIVIDUELLES AUTRES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9723;12;RET;VACANCES RETRAITES (CLERCS ET EMPLOYES DE NOTAIRES);0;0;19;0;;0;;0;Z;;0;0;;fermé le 31/12/2008;0 +9724;12;SCO;AIDE A LA SCOLARITE (CRPCEN);0;0;19;0;;0;;0;Z;21;0;0;;;0 +9725;12;CES;ACCUEIL JEUNE ENFANT (CRPCEN);0;0;19;0;;0;;0;Z;21;0;0;;;0 +9726;99;SOP;PRESTATIONS SUPPLÉMENTAIRES OPTIQUES CRPCEN;0;9;99;0;;0;;0;Z;21;0;0;;;0 +9727;99;SAU;PRESTATIONS SUPPLÉMENTAIRES ACOUSTIQUES CRPCEN;0;9;99;0;;0;;0;Z;21;0;0;;;0 +9731;12;AMU;COMPLEMENT A L'ACS;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9732;12;AFI;AIDE A L'ACQUISITION D'UNE COUVERTURE COMPLEMENTAIRE POUR LES VICTIMES DU SEUIL ACS;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9741;12;HLO;AIDES AU LOGEMENT;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9742;12;HCO;AIDE A LA COMMUNICATION HORS AUDITIF;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9840;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9841;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9842;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9843;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9844;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9845;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9846;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9847;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9848;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9849;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9850;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9851;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9852;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9853;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9854;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9855;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9856;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9857;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9858;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9859;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9860;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9861;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9862;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9863;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9864;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9865;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9866;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9867;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9868;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9869;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9870;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9871;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9872;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9873;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9874;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9769;12;DIN;AIDES FINANCIERES A CARACTERE SOCIAL PALLIANT L'ABSENCE DE REVENUS;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9771;12;HPA;AIDES PROTHESES AUDITIVES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9772;12;HAI;AIDES MENAGERES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9773;12;ASH;AIDES MENAGERES SORTIE D'HOSPITALISATION COORDONNEE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9774;12;PSH;ACTES FOURNITURES SORTIE D'HOSPITALISATION COORDONNEE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9775;12;PAD;AIDES MENAGERES PROGRAMME D'ACCOMPAGNEMENT APRES INTERVENTION ORTHOPEDIQUE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9776;12;PFD;ACTES FOURNITURES PROGRAMME D'ACCOMPAGNEMENT APRES INTERVENTION ORTHOPEDIQUE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9777;12;PPL;PHARMACIE NON REMBOURSABLE SOINS PALLIATIFS;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9778;12;BCP;BILANS DE COMPETENCES ET REORIENTATION PROFESSIONNELLE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9801;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9802;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9803;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9804;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +1407;99;CT1;COURONNE TRANSITOIRE RAC MODERE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1408;99;CZ1;COURONNE ZIRCONE RAC MODERE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1409;99;IC1;INLAY CORE RAC MODERE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1410;99;IN1;INLAY ONLAY RAC MOERE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1412;99;PA1;PROTHESE AMOVIBLE RAC MODERE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1413;99;RE1;REPARATION PROTHESE RAC MODERE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1414;99;SU1;SUPPLEMENT PROTHESE METALLIQUE RAC MODERE;1;9;99;0;;1;;1;T;20;1;42;;;0 +9875;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9876;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9877;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9878;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9879;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9880;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9881;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9882;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9883;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9884;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9885;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9886;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9887;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9888;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9889;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9890;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9891;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9892;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9893;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9894;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9895;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9896;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9897;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9898;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9899;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9901;12;TRA;TRANSPORT POUR PERSONNE ACCOMPAGNANTE (MILITAIRES);0;0;19;0;;0;;0;Z;21;0;0;;;0 +9902;12;AFA;AIDE SOCIALE (MILITAIRES);0;0;19;0;;0;;0;Z;21;0;0;;;0 +9911;12;AIM;AIDE MENAGERE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9912;12;AFM;AIDE MENAGERE FAMILIALE (MILITAIRES);0;0;19;0;;0;;0;Z;21;0;0;;;0 +9999;99;;VALEUR INCONNUE;9;9;99;0;;0;;0;I;;0;0;;;1 +1195;99;A51;FOND INNOVATION - PAIEMENT;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1404;99;RA0;REPARATION ADJONCTION RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +1405;99;RS0;REPARATION PROTHESE ADJOINTE SIMPLE RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +1406;99;SU0;SUPPLEMENT PROTHESE RESINE RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +2386;5;PY6;FORFAIT PSYCHIATRIE SEANCE COLL, 2 INTERVENANT 6 à 8H;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2387;5;PY7;FORFAIT PSYCHIATRIE SEANCE IND. 2 INTERVENANTS 6 à 8H;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2388;5;PY8;FORFAIT PSYCHIATRIE DE SECURITE HOSPITALISATION SANS HEBERGEMENT;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2389;5;PY9;PRISE EN CHARGE DE NUIT POUR UNE DUREE ENTRE 8 ET 12H;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2391;5;FR2;FORFAIT TECHNIQUE TARIF REDUIT N°2;1;1;10;0;;0;1;0;Z;;0;0;;;0 +2392;5;FR3;FORFAIT TECHNIQUE TARIF REDUIT N°3;1;1;10;0;;0;1;0;Z;;0;0;;;0 +2411;11;IG;INTERVENTION IVG;1;0;10;0;;0;;1;T;20;0;18;1;;0 +2412;11;IGA;ANESTHESIE GENERALE;1;0;10;0;;0;;1;T;20;0;18;1;;0 +2413;11;IGB;INVESTIGATIONS BIOLOGIQUES;1;0;10;0;;0;;1;Z;20;0;18;;;0 +2414;11;IC;CONSULTATION IVG;1;0;10;0;;0;;1;C;20;0;18;1;;0 +2415;11;IGM;MEDICAMENTS: MIFEYGINE;1;0;10;0;;0;;1;Z;21;0;5;;;0 +2416;11;IGP;MEDICAMENTS: PROSTAGLANDINES;1;0;10;0;;0;;1;Z;21;0;5;;;0 +2417;11;IVB;VERIFICATION BIOLOGIQUE;1;0;10;0;;0;;1;Z;20;0;18;;;0 +2418;11;IVE;VERIFICATION ECHOGRAPHIQUE;1;0;10;0;;0;;1;Z;20;0;18;1;;0 +2419;11;IMD;FORFAIT INTERVENTION AMBULATOIRE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2420;11;IMI;FORFAIT INTERVENTION DUREE < OU = 12 H PRIVE MEDIC;0;0;10;0;;0;;1;Z;22;0;18;;;0 +2421;11;AMD;INTERVENTION + ANESTHESIE AMBULATOIRE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2422;11;AMF;FORFAIT POUR IVG MEDICAMENTEUSE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2423;11;IPD;FORFAIT INTERVENTION AVEC NUITEE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2424;11;APD;INTERVENTION + ANESTHESIE AVEC NUITEE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2425;99;FJS;FORFAIT IVG POUR 24H SUPPLEMENTAIRES - SECTEUR PRIVE/SECTEUR PUBLIC;0;9;99;0;;0;;0;Z;22;0;0;;fermé le 30/03/2013;0 +2426;11;ICS;CONSULTATION IVG SPECIALISTE;1;0;10;0;;0;;1;C;20;0;18;;;0 +2428;99;IPE;ECHO PRE IVG;1;9;10;0;;0;;1;Z;20;0;18;;;0 +2501;5;FI1;FORFAIT FIR PDSES PUBLIQUE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2502;5;FI2;FORFAIT FIR CENTRE DEPISTAGE ANONYME ET GRATUIT;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2503;5;FI3;FORFAIT FIR CENTRE PERINATAUX DE PROXIMITE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2504;5;FI4;FORFAIT FIR EDUCATION THERAPEUTIQUE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +3101;99;FSA;FORFAIT STRUCTURE PS AUXILIAIRES;1;9;99;0;;0;;1;Z;21;0;47;;;0 +3102;99;FSB;CONTESTATION FORFAIT STRUCTURE PS AUXILIAIRES;1;9;99;0;;0;;1;Z;21;0;47;;;0 +3110;99;CII;CONTRAT INCITATIF INFIRMIER;1;9;99;0;;0;;1;Z;20;0;47;;;0 +3111;2;AMI;ACTES EN AMI;1;1;;0;;1;1;1;Z;20;1;32;;;0 +3112;2;AIS;ACTES INFIRMIERS DE SOINS (AMI3-AMI13,AMI16);1;0;3;0;;1;1;1;Z;20;1;3;;;0 +3113;2;SFI;ACTES INFIRMIERS DES SAGES-FEMMES (SFI);1;1;;0;;1;1;1;Z;20;1;32;;;0 +3114;2;PSI;PLAN DE SOINS INFIRMIER;1;0;3;0;;0;;0;Z;;1;0;;fermé le 01/10/1998;0 +3115;2;DI;DEMARCHE INFIRMIER;1;0;3;0;;1;;1;Z;20;1;3;;;0 +3116;2;MAU;MAJORATION POUR ACTE UNIQUE;1;0;3;1;32;0;1;1;Z;20;0;70;;;0 +3117;2;MCI;MAJORATION DE COORDINATION ET D'ENVIRONNEMENT DE SOIN INFIRMIER;1;0;3;1;32;0;1;1;Z;20;0;70;;;0 +3118;2;VGI;REMUNERATION VACCINATION GRIPPE A PAR INFIRMIERS LIBERAUX;1;0;3;0;;0;;1;Z;20;0;47;;;0 +3119;2;VIR;REMUNERATION VACCINATION GRIPPE A PAR INFIRMIER RETRAITE OU SALARIE HORS OBLIGATIONS;1;0;3;0;;0;;0;Z;;0;0;;;0 +3121;2;AMC;ACTES AMC;1;0;3;0;;1;1;1;Z;20;1;3;;;0 +3122;2;AMK;ACTES EN AMK;1;0;3;0;;1;1;1;Z;20;1;3;;;0 +3124;2;AMB;BILAN DE KINESITHERAPIE;1;0;3;0;;0;;0;Z;;1;0;;fermé le 01/10/1998;0 +3125;2;AMS;ACTES DE KINESITHERAPIE OSTEO-ARTICULAIRE;1;0;3;0;;1;1;1;Z;20;1;3;;;0 +3126;99;CIK;CONTRAT DEMOGRAPHIQUE KINE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +3127;99;FRD;FORFAIT PRISE EN CHARGE AVC;1;9;99;0;;1;;1;C;20;0;46;1;;0 +3128;99;FAD;RETOUR A DOMICILE POST CHIRURGIE ORTHOPEDIQUE;1;9;99;0;;1;;1;C;20;0;46;1;;0 +3131;2;AMP;ACTES DES PEDICURES;1;0;3;0;;1;1;1;Z;20;1;3;;;0 +3132;2;AMO;ACTES DES ORTHOPHONISTES;1;0;3;0;;1;;1;Z;20;1;3;;;0 +3133;2;AMY;ACTES DES ORTHOPTISTES;1;0;3;0;;1;;1;Z;20;1;3;;;0 +3134;1;POD;ACTE DE PEDICURE-PODOLOGUE (DIABETIQUE);1;0;3;0;;1;1;1;Z;20;0;3;;;0 +3135;99;FOT;FORFAIT EVALUATION DOMICILE HANDICAP;1;9;99;1;32;0;;1;Z;20;0;70;;;0 +3139;99;CIO;CONTRAT INCITATIF ORTHOPHONISTE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +3211;2;B;ACTES DE BIOLOGIE;1;1;;0;;1;1;1;Z;20;0;29;;;0 +3212;2;BP;ACTES D ANATOMO-CYTO-PATHOLOGIE EN LABORATOIRE;1;0;4;0;;0;;1;Z;;0;29;;fermé le 31/12/2001;0 +3213;99;FPB;FORFAIT PREALABLE BIOLOGIE IVG VILLE;1;9;99;0;;0;;1;Z;20;0;18;0;;0 +3214;2;BR;ACTES EN BR;1;0;4;0;;0;;1;Z;;0;29;;fermé le 31/12/2001;0 +3215;2;ADU;ANALYSE DEPISTAGE CANCER DE L'UTERUS;1;0;4;0;;0;;1;Z;;0;4;;fermé le 01/01/1999;0 +3216;99;FUB;FORFAIT ULTERIEUR BIOLOGIE IVG VILLE;1;9;99;0;;0;;1;Z;20;0;18;0;;0 +3221;2;KB;PRELEVEMENT AUTRE QUE SANGUIN PAR UN DIRECTEUR DE LABORATOIRE;1;0;4;0;;1;;1;Z;20;0;4;;;0 +3222;2;PB;PRELEVEMENT SANGUIN PAR UN DIRECTEUR DE LABORATOIRE;1;1;;0;;1;1;1;Z;20;0;4;;;0 +3223;2;TB;PRELEVEMENT SANGUIN PAR UN TECHNICIEN DE LABORATOIRE;1;1;;0;;1;1;1;Z;20;0;4;;;0 +3224;2;KDU;PRELEVEMENT DEPISTAGE CANCER DE L'UTERUS;1;0;4;0;;0;;1;Z;;0;20;;fermé le 01/01/1999;0 +3225;2;KMB;PRELEVEMENT PAR PONCTION VEINEUSE DIRECTE POUR UN MEDECIN BIOLOGISTE;1;0;4;0;;1;1;1;T;20;0;4;;;0 +3300;99;RRA;REMUNERATION ROSP AOD;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3301;99;CRA;CONTESTATION ROSP AOD;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3302;99;PGC;PAIEMENT GARANTIE CONVENTIONNELLE PHARMACIE;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3303;99;CGC;CONTESTATION GARANTIE CONVENTIONNELLE PHAMACIE;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3304;99;PQS;PAIEMENT QUALITE DE SERVICE PHARMACIE;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3305;99;CQS;CONTESTATION QUALITE DE SERVICE PHARMACIE;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3306;99;BMR;BILAN MEDICATION REMUNERATION PHARMACIE;1;9;99;0;;0;;0;Z;21;0;12;;;0 +3307;99;BMC;BILAN MEDICATION CONTESTATION PHARMACIE;1;9;99;0;;0;;0;Z;21;0;12;;;0 +3311;2;PH1;PHARMACIE 100%;1;0;5;0;;1;;1;Z;21;0;28;;;0 +3312;2;PH4/PG4;PHARMACIE PH4;1;0;5;0;;1;;1;Z;;0;28;;;0 +3313;2;PH7/PG7;PHARMACIE 65%;1;1;;0;;1;;1;Z;;0;28;;PG7 : fermé le 01/10/2007;0 +2369;5;N15;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 15;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2371;5;G1;TARIF SOINS GIR 1 ET 2;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2372;5;G2;TARIF SOINS GIR 3 ET 4;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2373;5;G3;TARIF SOINS GIR 5 ET 6;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2380;5;PY0;FORFAIT PSYCHIATRIE SEANCE COLL, 1 INTERVENANT 3 à 4H;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2381;5;PY1;FORFAIT PSYCHIATRIE SEANCE IND, 1 INTERVENANT 3 à 4H;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2382;5;PY2;FORFAIT PSYCHIATRIE SEANCE COLL, 2 INTERVENANTS 3 à 4H;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2383;5;PY3;FORFAIT PSYCHIATRIE SEANCE IND. 2 INTERVENANTS 3 à 4H;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2384;5;PY4;FORFAIT PSYCHIATRIE SEANCE COLL, 1 INTERVENANT 6 à 8H;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2385;5;PY5;FORFAIT PSYCHIATRIE SEANCE IND. 1 INTERVENANT 6 à 8H;0;0;10;0;;0;;0;Z;22;0;0;;;0 +1146;99;R4P;REMUNERATION OBJECTIF MEDECIN COMPLEMENT ET CENTRES DE SANTE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1147;99;CFM;CONTESTATION FORFAIT MEDECIN TRAITANT;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1148;99;MPA;REMUNERATION FORFAITAIRE PAR CONSULTATION POUR LE SUIVI DES PERSONNES AGEES;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1149;99;CPA;CONTESTATION REMUNERATION SUIVI PERSONNES AGEES;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1150;99;RPT;REMUNERATION PRATICIENS TERRITORIAUX DE MEDECINE GENERALE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1152;99;MPP;MAJORATION SUIVI DES ENFANTS GRANDS PREMATURES OU ATTEINTS DE PATHOLOGIE CONGENITALE GRAVE;1;9;99;1;22;0;;1;C;20;0;1;1;fermé le 31/12/2017;0 +1153;99;MIC;MAJORATION CONSULTATION POUR INSUFFISANT CARDIAQUE APRES HOSPITALISATION;1;9;99;1;22;0;;1;C;20;0;1;1;;0 +1154;99;MSH;MAJORATION CONSULTATION SUIVI APRES HOSPITALISATION PATIENTS A FORTE COMORBIDITE;1;9;99;1;22;0;;1;C;20;0;1;1;;0 +1155;99;COT;REMU CAS COTISATIONS SOCIALES;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1156;99;CCO;CONT. CAS COT.SOC.;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1157;99;TCP;Acte de téléconsultation;1;9;99;0;;1;;1;Z;20;0;1;;;0 +1158;99;TEP;Acte de télé expertise;1;9;99;0;;1;;1;Z;20;0;1;;;0 +1159;99;RNO;renouvellement d'optique;1;9;99;0;;1;;1;C;20;0;1;0;;0 +1160;99;IAS;INVESTISSEMENT ACTIVITE SAISONNIERE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1161;99;CPS;COMPLEMENT PRATIQUE SAISONNIERE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1162;99;RCM;REMUNERATION DES PRATICIENS TERRITORIAUX DE MEDECINE AMBULATOIRE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1163;99;KIT;REMUNERATION DEPISTAGE DU CANCER COLORECTAL;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1164;99;TLC;TÉLÉ CONSULTATION - ALD ET / OU EHPAD;1;9;99;0;;1;;1;C;20;0;1;;;0 +1165;99;TLE;TÉLÉ EXPERTISE - ALD ET/OU EHPAD;1;9;99;0;;0;;1;Z;20;0;1;;;0 +1166;99;TEC;FORFAIT COMPLÉMENTAIRE TÉLÉ EXPERTISE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1167;99;NRD;VERSEMENT DE PENALITE DE RETARD AMO;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1168;99;CCP;CONSULTATION DE CONTRACEPTION ET PREVENTION;1;9;99;0;;1;;1;C;20;0;1;1;;0 +1169;99;RCD;REMUNERATION POUR CERTIFICAT DE DECES;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1170;99;DHE;PEC EXCEPTIONNELLE DÉPASSEMENT HONORAIRE;1;9;99;0;;0;;1;Z;20;0;1;;;0 +1171;99;CSR;RÉMUNÉRATION MÉDECIN TRAITANT CENTRES DE SANTÉ;1;9;99;0;;0;;0;Z;20;0;0;;;0 +1172;99;TSA;TELESURVEILLANCE : PS EFFECTUANT L'ACCOMPAGNEMENT;1;9;99;0;;0;;1;C;20;0;1;;;0 +1173;99;MTF;FORFAIT PATIENTELE MEDECIN TRAITANT;1;9;99;0;;0;;1;Z;21;0;47;;;0 +0;0;;SANS OBJET;0;0;;0;;0;;0;Z;;0;0;;;2 +1095;99;MTJ;MAJORATION SPECIFIQUE MOINS DE 16 ANS MAYOTTE;1;9;99;1;23;0;;1;Z;20;0;1;1;;0 +1096;99;TTE;TELECONSULTATION MEDECIN TRAITANT AVEC EHPAD;1;9;99;0;;0;;1;C;20;0;1;;;0 +1097;99;TDT;TELE EXPERTISE DOSSIER TRAITANT;1;9;99;0;;0;;1;C;20;0;1;;;0 +1098;99;U03;CONSULTATION CCMU 3;1;9;99;0;;0;;1;C;20;0;1;1;;0 +1099;99;U45;CONSULTATION CCMU 4 ET 5;1;9;99;0;;0;;1;C;20;0;1;1;;0 +1100;99;RNM;PROTOCOLE MURAINE - BILAN VISUEL;1;9;99;0;;1;;1;C;20;0;1;1;;0 +1101;99;APU;AVIS PONCTUEL DE CONSULTANT PUPH;1;9;99;0;;1;1;1;C;20;0;1;1;;0 +1102;99;APY;AVIS PONCTUEL DE CONSULTANT PSYCHIATRE;1;9;99;0;;1;1;1;C;20;0;1;1;;0 +1103;99;APC;AVIS PONCTUEL DE CONSULTANT DU MEDECIN;1;9;99;0;;0;1;1;C;20;0;1;1;;0 +1104;99;COE;CONSULTATION OBLIGATOIRE ENFANT;1;9;99;0;;1;;1;C;20;0;1;1;;0 +1105;99;CCX;CONSULTATION COMPLEXE;1;9;99;0;;1;;1;C;20;0;1;1;;0 +1106;99;MCX;MAJORATION CONSULTATION COMPLEXE;1;9;99;1;22;0;;1;C;20;0;1;1;;0 +1107;99;CCE;CONSULTATION TRES COMPLEXE ENFANT;1;9;99;0;;1;;1;C;20;0;1;1;;0 +1108;99;MTX;MAJORATION CONSULTATION TRES COMPLEXE;1;9;99;1;22;0;;1;C;20;0;1;1;;0 +1109;99;GS;CONSULTATION SPECIALISTE MEDECINE GENERALE;1;9;99;0;;1;1;1;C;20;0;1;1;;0 +1110;99;G;CONSULTATION MEDECINE GENERALE;1;9;99;0;;1;1;1;C;20;0;1;1;;0 +1111;1;C;CONSULTATION COTEE C;1;1;;0;;1;1;1;C;20;0;1;1;;0 +1112;1;CS;CONSULTATION COTEE CS;1;1;;0;;1;1;1;C;20;0;1;1;;0 +1113;1;CNP;CONSULTATION COTEE CNP;1;1;;0;;1;1;1;C;20;0;1;1;;0 +1114;1;CSC;CONSULTATION SPECIFIQUE CARDIOLOGIE;1;0;1;0;;1;1;1;C;20;0;1;1;;0 +1115;1;CA;CONSULTATION BILAN;1;0;1;0;;1;;1;C;20;0;1;1;fermé le 01/07/2017;0 +1116;1;MPC;MAJORATION FORFAITAIRE TRANSITOIRE;1;1;;1;6;0;1;1;Z;20;0;1;1;;0 +1117;1;;CONSULTATION DES SPECIALISTES COTEE C2;1;1;;0;;1;1;1;C;20;0;1;1;;0 +1118;1;;CONSULTATION DES PSYCHIATRES COTEE C2,5;1;1;;0;;1;1;1;C;20;0;1;1;;0 +1119;1;MTS;MAJORATION TRANSITOIRE SPECIFIQUE;1;0;1;1;23;0;;1;Z;20;0;1;1;;0 +1120;1;CCS;COMPLEMENT CONSULTATION SPECIALISTE;1;0;1;1;26;0;1;1;C;20;0;1;;;0 +1121;1;HS;HONORAIRE DE SURVEILLANCE;1;0;1;0;;0;;1;C;20;0;1;;;0 +1122;1;EXS;EXAMEN SPECIAL (PROTOCOLE);1;0;1;0;;1;;1;C;20;0;16;1;;0 +1123;1;SES;SUITE D EXAMEN DE SANTE;1;0;1;0;;1;;1;C;20;0;1;1;;0 +1124;1;RMT;REMUNERATION MEDECIN TRAITANT PAR PATIENT EN ALD;1;0;1;0;;0;;1;Z;20;0;47;;fermé le 31/12/2017;0 +1125;1;MCG;MAJORATION DE COORDINATION DES GENERALISTES;1;1;;1;18;0;1;1;Z;20;0;1;1;;0 +1126;1;MCS;MAJORATION DE COORDINATION SPECIALISTES;1;1;;1;18;0;1;1;Z;20;0;1;1;;0 +1127;1;MCC;MAJORATION DE COORDINATION CARDIOLOGUES;1;1;;1;18;0;;1;Z;20;0;1;1;;0 +1128;1;DRT;DIFFERENTIEL REFERENT TRAITANT;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1129;1;MPJ;MAJORATION FORFAITAIRE TRANSITOIRE (POUR LES MOINS DE 16 ANS);1;1;;1;6;0;1;1;Z;20;0;1;1;fermé le 31/12/2017;0 +1130;99;FMT;FORFAIT MEDECIN TRAITANT;1;9;99;0;;0;;1;Z;20;0;47;;fermé le 31/12/2017;0 +1131;1;MTA;MAJORATION CONSULTATION APPAREILLAGE;1;0;1;1;22;0;;1;C;20;0;1;1;;0 +1132;1;MCE;MAJORATION CONSULTATION ENDOCRINO;1;0;1;1;22;0;;1;C;20;0;1;1;;0 +1133;1;MGE;MAJORATION GENERALISTE ENFANT;1;0;1;1;5;0;;1;C;20;0;1;1;fermé le 31/12/2017;0 +1134;1;MPF;MAJORATION PREMIERE CONSULTATION FAMILLE;1;0;1;1;22;0;;1;C;20;0;1;1;;0 +1135;1;MAF;MAJORATION CONSULTATION ANNUELLE FAMILLE;1;0;1;1;22;0;;1;C;20;0;1;1;;0 +1136;1;MAS;MAJORATION ANNUELLE DE SYNTHESE;1;0;1;1;5;0;;1;C;20;0;1;1;fermé le 31/12/2017;0 +1137;1;MBB;MAJORATION NOURRISSON;1;0;1;1;5;0;;1;C;20;0;1;1;fermé le 31/12/2017;0 +1138;1;RAA;REMUNERATION ADDITIONNELLE CAPI;1;0;1;1;28;0;;1;Z;20;0;47;;fermé le 30/06/2017;0 +1139;1;RAC;REMUNERATION DES ADHERENTS AU CAPI;1;0;1;0;;0;;1;Z;20;0;47;;fermé le 30/06/2012;0 +1140;1;CDE;CONSULTATION SPECIFIQUE DE DEPISTAGE;1;0;1;0;;1;;1;C;20;0;1;1;;0 +1141;1;MPE;MAJORATION PEDIATRE ENFANT;1;0;1;1;5;0;;1;C;20;0;1;1;fermé le 31/12/2017;0 +1142;99;RSO;REMUNERATION ADHESION SOPHIA (SOINS DE VILLE);1;9;99;0;;0;;1;Z;20;0;47;;;0 +1143;99;RSR;REMUNERATION RENOUVELLEMENT SOPHIA (SOINS DE VILLE);1;9;99;0;;0;;1;Z;20;0;47;;;0 +1144;99;RST;REMUNERATION FORFAITAIRE POUR LE SUIVI DES PATIENTS EN POST ALD;1;9;99;0;;0;;1;Z;20;0;47;;fermé le 31/12/2017;0 +1145;99;P4P;REMUNERATION OBJECTIF MEDECIN ET CENTRES DE SANTE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1174;99;TSM;TELESURVEILLANCE : MEDECIN TELESURVEILLANT;1;9;99;0;;0;;1;C;20;0;1;;;0 +1175;99;DHT;PEC EXCEPTIONNELLE DEPASSEMENT HONORAIRE TP;1;9;99;0;;0;;1;Z;20;0;1;;;0 +1176;99;COI;CONTRAT INDIVIDUEL EMBAUCHE - AVANCE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1177;99;AFC;AIDE FINANCIERE MATERNITE PATERNITE ADOPTION;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1178;99;P6P;FORFAIT STRUCT. MEDECIN;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1179;99;R6P;FORFAIT STRUCT. MEDECIN COMPLEMENT;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1180;99;P5P;ROSP MT ENFANT;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1181;99;R5P;ROSP MT ENFANT COMPLEMENT;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1182;99;PTM;REMUNERATION OPTAM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1183;99;CTM;CONTESTATION OPTAM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1184;99;COS;CONTRAT INDIVIDUEL EMBAUCHE - SOLDE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1185;99;RNB;MURAINE - PAIEMENT DU BONUS ANNUEL;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1186;99;COC;CONTRAT INDIVIDUEL EMBAUCHE OBJECTIFS COMPLEMENTAIRES;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1187;99;CCI;CONTRAT COLLECTIF AIDE INVESTISSEMENT;1;9;99;0;;0;;0;Z;20;0;47;;;0 +1188;99;CCA;CONTRAT COLLECTIF ATTEINTE DES OBJECTIFS;1;9;99;0;;0;;0;Z;20;0;47;;;0 +1189;99;COF;CONTRAT DE FORMATION;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1190;99;TSP;TELESURVEILLANCE : PRIME VARIABLE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1191;99;TC;TELECONSULTATION TOUTES SPECIALITES;1;9;99;0;;1;1;1;C;20;0;1;;;0 +1192;99;TCG;TELECONSULTATION GENERALISTE;1;9;99;0;;1;1;1;C;20;0;1;;;0 +1193;99;TE1;TELE EXPERTISE DE NIVEAU 1;1;9;99;0;;1;;1;C;20;0;1;1;;0 +1194;99;TE2;TELE EXPERTISE DE NIVEAU 2;1;9;99;0;;1;;1;C;20;0;1;1;;0 +1209;99;VGS;VISITE SPECIALISTE MEDECINE GENERALE;1;9;99;0;;1;;1;C;20;0;1;1;;0 +1210;99;VG;VISITE MEDECINE GENERALE;1;9;99;0;;1;;1;C;20;0;1;1;;0 +1211;1;V;VISITE COTEE V;1;0;1;0;;1;;1;C;20;0;1;1;;0 +1212;1;VS;VISITE COTEE VS;1;0;1;0;;1;;1;C;20;0;1;1;;0 +1213;1;VNP;VISITE COTEE VNP;1;0;1;0;;1;;1;C;20;0;1;1;;0 +1214;1;VL;VISITE LONGUE ET COMPLEXE;1;0;1;0;;1;;1;C;20;0;1;1;;0 +1215;99;APV;AVIS PONCTUEL DE CONSULTANT MEDECIN (VISITE);1;9;99;0;;1;;1;C;20;0;1;1;;0 +1216;99;AVY;AVIS PONCTUEL DE CONSULTANT PSYCHIATRE (VISITE);1;9;99;0;;1;;1;C;20;0;1;1;;0 +1221;1;VA;VISITE D URGENCE;1;0;1;0;;1;;1;C;20;0;1;1;fermé le 15/09/2014;0 +1222;1;VU;VISITE URGENCE VU/MU;1;0;1;0;;1;;1;C;20;0;1;1;;0 +1223;1;MMD;MAJORATION MAINTIEN A DOMICILE;1;0;1;1;4;0;;1;Z;;0;1;;fermé le 30/09/2002;0 +1224;1;MD;MD (CRITERES MEDICAUX);1;0;1;1;4;0;1;1;Z;20;0;1;1;;0 +1225;1;MDE;MDE ( CRITERES ENVIRONNEMENTAUX);1;0;1;1;4;0;;1;Z;20;0;1;1;fermé le 10/02/2007;0 +1226;1;MDN;MD DE NUIT;1;0;1;1;1;0;1;1;Z;20;0;1;1;;0 +1227;1;MDI;MD DE MILIEU NUIT;1;0;1;1;1;0;1;1;Z;20;0;1;1;;0 +1228;1;MDD;MD DE DIMANCHE ET JOUR FERIES;1;0;1;1;2;0;1;1;Z;20;0;1;1;;0 +1229;1;MEN;MDE DE NUIT;1;0;1;1;1;0;;1;Z;;0;1;;fermé le 31/03/2003;0 +1231;1;MEI;MDE MILIEU DE NUIT;1;0;1;1;1;0;;1;Z;;0;1;;fermé le 31/03/2003;0 +1232;1;MED;MDE DIMANCHE ET JOUR FERIES;1;0;1;1;2;0;;1;Z;;0;1;;fermé le 31/03/2003;0 +1311;1;KC;ACTES EN K CHIRURGICAL;1;0;1;0;;1;1;1;T;20;0;37;1;fermé le 01/12/2014;0 +1312;1;K;ACTES DE SPECIALITE EN K;1;1;;0;;1;1;1;T;20;0;1;1;;0 +1313;1;KA;ACTES EN K D URGENCE;1;0;1;0;;1;;1;T;20;0;1;1;fermé le 01/01/2011;0 +1314;1;KFA;FORFAIT CHIRURGIE 1;1;0;1;1;8;0;;1;Z;20;0;1;;fermé le 31/12/2006;0 +1315;1;KFB;FORFAIT CHIRURGIE 2;1;0;1;1;8;0;;1;Z;20;0;1;;fermé le 31/12/2006;0 +1316;1;KE;ACTES DE DIAGNOSTIC COTES KE;1;1;;0;;1;1;1;T;20;0;1;1;;0 +1317;1;KCC;ACTES EN KCC: ACTES SPECIFIQUES DES CHIRURGIENS;1;0;1;0;;0;;1;T;20;0;1;1;fermé le 31/12/2006;0 +1318;1;KMO;ACTE DE PHONIATRIE PAR MEDECIN;1;0;1;0;;1;;1;T;20;0;1;1;;0 +1319;1;KFC;MAJORATION FORFAIT ACCOUCHEMENT;1;0;1;1;7;0;;1;Z;20;0;1;;fermé le 31/12/2006;0 +1320;1;KFD;FORFAIT RADIOGRAPHIE,ECHOGRAPHIE;1;0;1;1;15;0;;1;T;20;0;1;;fermé le 31/12/2006;0 +1321;99;ADC;ACTE DE CHIRURGIE CCAM;1;9;99;0;;1;1;1;T;20;1;44;1;;0 +1322;1;ACO;ACTE D'OBSTETRIQUE CCAM;1;1;;0;;1;1;1;T;20;0;25;1;;0 +1323;1;ADA;ACTE D'ANESTHESIE CCAM;1;1;;0;;1;1;1;T;20;0;25;1;;0 +1324;1;ADE;ACTE D'ECHOGRAPHIE CCAM;1;1;;0;;1;1;1;T;20;0;25;1;;0 +1331;1;Z;ACTES DE RADIOLOGIE;1;1;;0;;1;1;1;T;20;0;1;1;;0 +1332;1;ZN;ACTES DE RADIOLOGIE NUCLEAIRE;1;0;1;0;;0;;1;T;20;0;1;;fermé le 31/12/2006;0 +1333;1;PRA;MAJORATION POUR PRODUIT RADIOPHARMACEUTIQUE;1;0;1;1;9;0;;1;T;20;0;1;;fermé le 31/12/2006;0 +1334;1;DCS;DEPISTAGE CANCER DU SEIN;1;0;1;0;;0;;1;Z;;0;1;;fermé le 01/01/1999;0 +1335;1;ZM/ADI;ACTE DE RADIOLOGIE MAMMOGRAPHIE;1;0;1;0;;1;;1;T;;0;1;1;ZM : fermé le 31/12/2006;0 +1336;1;ZM DEPISTAGE/ADI;ACTE DE RADIOLOGIE MAMMOGRAPHIE DEPISTAGE;1;0;1;0;;1;;1;T;;0;1;1;ZM : fermé le 31/12/2006;0 +1341;99;P;ACTES D ANATOMO-CYTO-PATHOLOGIE/MEDECINS;1;9;99;0;;1;;1;T;20;0;1;1;fermé le 01/03/2011;0 +1342;99;MAP;MAJORATION ANATOMO-CYTO-PATHOLOGIE;1;9;99;1;19;0;;1;Z;20;0;1;1;fermé le 01/03/2011;0 +1345;1;MTC;MAJORATION TRANSITOIRE;1;0;1;1;14;0;;1;Z;20;0;1;;fermé le 31/12/2006;0 +1351;99;ADI;ACTE D'IMAGERIE (hors ECHOGRAPHIE) CCAM;1;9;99;0;;1;1;1;T;20;1;25;1;;0 +1352;99;ATM;ACTES TECHNIQUES MEDICAUX (hors IMAGERIE) CCAM;1;9;99;0;;1;1;1;T;20;1;25;1;;0 +1361;1;VDC;VIDEOCAPSULE;1;0;1;0;;1;;1;T;20;0;49;;;0 +1400;99;BR1;BRIDGE RAC MODERE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1401;99;PF0;PROTHESE FIXE RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +1402;99;PF1;PROTHESE FIXE RAC MODERE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1403;99;RF0;REPARATION FACETTE PROTHESE AMOVIBLE RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +1411;1;SCM/SPA;ACTES EN SCM (ET SPA POUR LA CRPCEN);1;0;1;0;;1;1;1;T;;1;34;1;SCM : fermé le 01/12/2014;0 +1415;99;CM0;PROTHESE FIXE METALLIQUE RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +1416;99;CT0;COURONNE TRANSITOIRE RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +1417;99;CZ0;COURONNE ZIRCONE RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +1418;99;IC0;INLAY CORE RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +1419;99;PA0;PROTHESE AMOVIBLE RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +1420;99;PT0;PROTHESE AMOVIBLE DE TRANSITION RAC 0;1;9;99;0;;1;;1;T;20;1;42;;;0 +1421;1;PRO;ACTES DE PROTHESE DENTAIRE PRATIQUES PAR LE MEDECIN;1;0;1;0;;1;1;1;T;20;1;35;;fermé le 01/12/2014;0 +1422;1;ORT/EOS;TRAITEMENTS D ORTHODONTIE PRATIQUES PAR LE MEDECIN (ET EOS POUR LA CRPCEN);1;0;1;0;;1;;1;T;20;1;36;;;0 +1423;1;SPR;ACTES DE PROTHESE DENTAIRE PRATIQUES PAR LE CHIRURGIEN-DENTISTE;1;0;2;0;;1;1;1;Z;20;0;35;;fermé le 01/12/2014;0 +1424;1;TO/ETO;TRAITEMENTS D ORTHODONTIE PRATIQUES PAR LE CHIRURGIEN-DENTISTE (ET ETO POUR LA CRPCEN);1;0;2;0;;1;;1;Z;;0;36;;;0 +1425;1;ATD;COMPLEMENT AT 150% DENTAIRE;1;0;99;1;27;0;;1;Z;20;0;35;;;0 +1426;99;DDE;PEC EXCEPTIONNELLE DÉPASSEMENT DENTAIRE;1;9;99;0;;0;;1;Z;20;0;41;;;0 +1427;99;DDT;PEC EXCEPTIONNELLE DEPASSEMENT DENTAIRE TP;1;9;99;0;;0;;1;Z;20;0;41;;;0 +1650;99;CAA;AIDE A L'ACTIVITE COTRAM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1651;99;MRA;PAIEMENT MAJORATION REMUNERATION ARS COTRAM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1652;99;RFC;PTMR - REMPLACEMENT;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1653;99;RCR;PTMR - MALADIE, MATERNITE, PATERNITE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1701;1;CMD;CDS MEDICAL OPTANT FORF DEBUT;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1702;1;CMF;CDS MEDICAL OPTANT FORF FIN;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1703;1;CMT;CDS MEDICAL OPTANT FORFTACITE;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1704;1;CDI;CDS DENTAIRE OPTANT FORF INITIAL;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1705;1;CDS;CDS DENTAIRE OPTANT FORF SUIVI;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1706;1;CDF;CDS DENTAIRE OPTANT FORF FINAL;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1707;1;CID;CDS INFIRMIER OPTANT FORF DEBUT;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1708;1;CIF;CDS INFIRMIER OPTANT FORF FIN;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1711;1;MRD;FORFAIT MEDECIN REFERENT DEBUT DE CONTRAT;1;0;1;0;;0;;1;Z;20;0;46;;fermé le 28/02/2007;0 +1712;1;MRF;FORFAIT MEDECIN REFERENT FIN DE CONTRAT;1;0;1;0;;0;;1;Z;20;0;46;;fermé le 28/02/2007;0 +1713;1;MRI;FORFAIT MEDECIN REFERENT INFORMATISE;1;0;1;0;;0;;1;Z;;0;46;;fermé le 20/10/1997;0 +1715;1;FMC;FORFAIT MENSUEL COORDONNATEUR;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1716;1;FMP;FORFAIT MENSUEL PARTICIPATION;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1717;1;FMS;FORFAIT MENS UEL SOINS;1;0;1;0;;0;;1;C;20;0;46;;;0 +1718;1;FAZ;FORFAIT D'ADHESION ZONE DEFICITAIRE;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1721;1;F01;FORFAIT PROFESSIONNEL (F01) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1722;1;F02;FORFAIT PROFESSIONNEL (F02) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1723;1;F03;FORFAIT PROFESSIONNEL (F03) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1724;1;F04;FORFAIT PROFESSIONNEL (F04) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1725;1;F05;FORFAIT PROFESSIONNEL (F05) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1726;1;F06;FORFAIT PROFESSIONNEL (F06) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1727;1;F07;FORFAIT PROFESSIONNEL (F07) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1728;1;F08;FORFAIT PROFESSIONNEL (F08) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1729;1;F09;FORFAIT PROFESSIONNEL (F09) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1731;1;F10;FORFAIT PROFESSIONNEL (F10) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1732;1;F11;FORFAIT PROFESSIONNEL (F11) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1733;1;F12;FORFAIT PROFESSIONNEL (F12) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1734;1;F13;FORFAIT PROFESSIONNEL (F13) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1735;1;F14;FORFAIT PROFESSIONNEL (F14) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1736;1;F15;FORFAIT PROFESSIONNEL (F15) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1741;1;FC0;FORFAIT CONSULTATION (FC0) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1742;1;FC1;FORFAIT CONSULTATION (FC1) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1743;1;FC2;FORFAIT CONSULTATION (FC2) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1744;1;FC3;FORFAIT CONSULTATION (FC3) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1745;1;FC4;FORFAIT CONSULTATION (FC4) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1746;1;FC5;FORFAIT CONSULTATION (FC5) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1747;1;FC6;FORFAIT CONSULTATION (FC6) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1748;1;FC7;FORFAIT CONSULTATION (FC7) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1749;1;FC8;FORFAIT CONSULTATION (FC8) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1751;1;FC9;FORFAIT CONSULTATION (FC9) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1761;1;FF0;FORFAIT FORMATION (FF0) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1762;1;FF1;FORFAIT FORMATION (FF1) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1763;1;FF2;FORFAIT FORMATION (FF2) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1764;1;FF3;FORFAIT FORMATION (FF3) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1765;1;FF4;FORFAIT FORMATION (FF4) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1766;1;FF5;FORFAIT FORMATION (FF5) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1767;1;FF6;FORFAIT FORMATION (FF6) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1768;1;FF7;FORFAIT FORMATION (FF7) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1769;1;FF8;FORFAIT FORMATION (FF8) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1771;1;FF9;FORFAIT FORMATION (FF9) FILIERES ET RESEAUX;1;2;;0;;0;;1;Z;20;0;47;;;0 +1781;1;FP0;FORFAIT PREVENTION/DEPISTAGE (FP0) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1782;1;FP1;FORFAIT PREVENTION/DEPISTAGE (FP1) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1783;1;FP2;FORFAIT PREVENTION/DEPISTAGE (FP2) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1784;1;FP3;FORFAIT PREVENTION/DEPISTAGE (FP3) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1785;1;FP4;FORFAIT PREVENTION/DEPISTAGE (FP4) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1786;1;FP5;FORFAIT PREVENTION/DEPISTAGE (FP5) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1787;1;FP6;FORFAIT PREVENTION/DEPISTAGE (FP6) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1788;1;FP7;FORFAIT PREVENTION/DEPISTAGE (FP7) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1789;1;FP8;FORFAIT PREVENTION/DEPISTAGE (FP8) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1791;1;FP9;FORFAIT PREVENTION/DEPISTAGE (FP9) FILIERES ET RESEAUX;1;2;;0;;0;;1;C;20;0;46;;;0 +1811;1;IK IKP;IK PLAINE;1;2;;1;10;0;1;1;Z;;0;55;;;0 +1812;1;IKM;IK MONTAGNE;1;2;;1;10;0;1;1;Z;20;0;55;;;0 +1813;1;IKS;IK PIED SKI;1;2;;1;10;0;1;1;Z;20;0;55;;;0 +1814;1;IKG;FRAIS DE DEPLACEMENT VACATION;1;2;;1;10;0;;1;Z;20;0;55;;;0 +1821;1;ID;ID PARIS LYON MARSEILLE, +100.000 HA, -100.000 HA;1;2;;1;10;0;1;1;Z;20;0;7;;;0 +1841;1;IF;INDEMNITES FORFAITAIRES DE DEPLACEMENT;1;2;;1;10;0;1;1;Z;20;0;7;;;0 +1842;2;IFA;INDEMNITES FORFAITAIRES DE DEPLACEMENT DES AUXILIAIRES MEDICAUX ET ASSIMILES;1;0;3;1;10;0;1;1;Z;20;0;7;;;0 +1843;2;IFO;INDEMNITES FORFAITAIRES DE DEPLACEMENT MK ORTHOPEDIQUE ET RHUMATOLOGIQUE;1;0;3;1;10;0;1;1;Z;20;0;7;;;0 +1844;2;IFR;INDEMNITES FORFAITAIRES DE DEPLACEMENT MK RHUMATISMALE;1;0;3;1;10;0;1;1;Z;20;0;7;;;0 +1845;2;IFN;INDEMNITES FORFAITAIRES DE DEPLACEMENT MK NEUROLOGIQUE;1;0;3;1;10;0;1;1;Z;20;0;7;;;0 +1846;2;IFP;INDEMNITES FORFAITAIRES DE DEPLACEMENT MK PNEUMOLOGIE;1;0;3;1;10;0;1;1;Z;20;0;7;;;0 +1847;2;IFS;INDEMNITES FORFAITAIRES DE DEPLACEMENT DE SORTIE;1;0;3;1;10;0;1;1;Z;20;0;7;;;0 +1903;99;MEG;MAJORATION ENFANT GENERALISTE;1;9;99;1;5;0;;1;C;20;0;1;;;0 +1904;99;MEP;MAJORATION ENFANT PEDIATRE;1;9;99;1;5;0;;1;C;20;0;1;1;;0 +1905;99;NFE;NOUVEAU FORFAIT ENFANT;1;9;99;1;5;0;;1;C;20;0;1;1;;0 +2226;5;SPB;SUPPLEMENT POUR CHAMBRE PLOMBEE;0;0;10;1;13;0;;0;Z;;0;0;;fermé le 30/04/2003;0 +2227;5;SAP;SUPPLEMENT POUR ALIMENTATION PARENTERALE;0;0;10;1;13;0;;0;Z;;0;0;;fermé le 30/04/2003;0 +2229;5;FNN;FORFAIT PRISE EN CHARGE DU NOUVEAU NE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2230;99;PJL;PRIX DE JOURNEE REGIME LOCAL;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2231;5;PHJ;FORFAIT PHARMACEUTIQUE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2232;5;FTO;FORFAIT TRANSPLANTATION D ORGANE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2234;5;ENT;FORFAIT D ENTREE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2235;5;ANP;FORFAIT D ACTIVITE NON PROGRAMMEE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2236;5;FCO;FORFAIT CONSOMMABLE CARDIOLOGIE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2237;5;PJC;PART COMPLEMENTAIRE AIDE MEDICALE ETAT (REGULARISATION CMU COMPLEMENTAIRE);0;0;10;0;;0;;0;Z;22;0;0;;;0 +2238;5;ATU;FORFAIT D ACCUEIL ET DE TRAITEMENT DES URGENCES;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2239;5;FAU;FORFAIT ANNUEL D URGENCE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2240;5;TJC;TARIF JOURNALIER COMPLEMENTAIRE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2241;5;FSO;FRAIS DE SALLE D OPERATION;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2242;5;ARE;FRAIS D ANESTHESIE ET REANIMATION;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2243;5;FE;FRAIS D ENVIRONNEMENT;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2244;99;I02;Forfait Innovation ARGUS II;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2245;5;FST;FRAIS DE SALLE D ACCOUCHEMENT;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2246;5;FSG;FRAIS DE SALLE D ACCOUCHEMENT MULTIPLE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2247;5;FSY;FORFAIT PSYCHIATRIE DE SECURITE - HOSPITALISATION AVEC HEBERGEMENT;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2248;99;I01;Forfait Innovation HIFU;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2249;99;FAI;FORFAIT ACTIVITE ISOLE;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2250;5;FJC;FORFAIT JOURNALIER AIDE MEDICALE (REGULARISATION CMU COMPLEMENTAIRE);0;0;10;0;;0;;0;Z;22;0;0;;;0 +2251;5;FJ;FORFAIT JOURNALIER;0;1;10;1;12;0;;0;Z;22;0;0;;;0 +2252;5;FJA;FORFAIT JOURNALIER DE SORTIE;0;1;10;0;;0;;0;Z;22;0;0;;;0 +2257;5;FJO;FJ TRANSPLANTATION ORGANES;0;1;10;0;;0;;0;Z;22;0;0;;;0 +2258;5;FSJ;FORFAIT DE SOINS JOURNALIER;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2259;5;;SAISIE MANUELLE DES SEJOURS POUR LEQUELS LE FJ EST SUPERIEUR AU TM;0;0;10;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +2260;99;HPC;FORFAIT HOPITAL PROXIMITE COMPLEMENTAIRE;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2261;5;FA1;FORFAIT ACCUEIL DU PATIENT N 1;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2262;5;FA2;FORFAIT ACCUEIL DU PATIENT N 2;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2263;99;IFQ;FORFAIT IFAQ;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2264;99;IFZ;FORFAIT IFAQ SSR;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2265;99;DMA;DOTATION MODULEE A L'ACTIVITE;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2271;5;AS1;FORFAIT HPT GROUPE 1;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2272;5;AS2;FORFAIT HPT GROUPE 1.2;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2273;5;AS3;FORFAIT HPT 15 %;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2274;5;AS4;FORFAIT HPT GROUPE 2 + FAS1;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2275;5;AS5;FORFAIT HPT GROUPE 2 + FAS2;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2282;5;PAT;PARTICIPATION ASSURE TRANSITOIRE;1;0;10;1;16;0;;0;Z;;0;0;;fermé le 30/09/2007;0 +2283;5;PAH;PARTICIPATION ASSURE HOSPITALISATION PUBLIQUE (CMU + AME);0;0;10;1;16;0;;0;Z;22;0;0;;;0 +2284;5;PAJ;PARTICIPATION ASSURE HOSPITALISATION PUBLIQUE (REGIME LOCAL);0;0;10;1;16;0;;0;Z;22;0;0;;;0 +2285;5;PAS;PARTICIPATION ASSURE SUR SEJOUR;0;0;10;1;16;0;;0;Z;22;0;0;;;0 +2311;5;FNO;FORFAIT DE SEANCE DE SOINS SANS OXYGENE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 31/12/1998;0 +2312;5;FOC;FORFAIT POUR INSUFFISANCE RESPIRATOIRE AVEC OXYGENE EXTRACTEUR;0;0;10;0;;0;;0;Z;;0;0;;fermé le 31/12/1998;0 +2313;5;FOB;FORFAIT POUR INSUFFISANCE RESPIRATOIRE AVEC OXYGENE BOUTEILLE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 31/12/1998;0 +2314;5;FOL;FORFAIT POUR INSUFFISANCE RESPIRATOIRE AVEC OXYGENE LIQUIDE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 31/12/1998;0 +2315;5;PPC;APPAREILLAGE VENTILATION PRESSION POSITIVE CONTINUE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2321;5;FPA/FS;FORFAIT - LONG SEJOUR PERSONNES AGEES;0;0;10;0;;0;;0;Z;;0;0;;;0 +2331;5;SNS;RADIUMTHERAPIE ET CHIMIOTHERAPIE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2332;5;RF/SNS;READAPTATION FONCTIONNELLE;0;0;10;0;;0;;0;Z;;0;0;;;0 +2333;5;RP/FS;REEDUCATION PROFESSIONNELLE;0;0;10;0;;0;;0;Z;;0;0;;;0 +1974;1;FRT;FRANCHISE TIERS PAYANT SUR TRANSPORT;1;1;;1;16;0;;1;Z;20;0;52;;;0 +1975;1;FRH;FRANCHISE HORS TIERS PAYANT ACTE D'AUXILIAIRE MEDICAUX;1;1;;1;16;0;;1;Z;20;0;26;;;0 +1976;1;FRT;FRANCHISE TIERS PAYANT ACTE D'AUXILIAIRE MEDICAUX;1;1;;1;16;0;;1;Z;20;0;26;;;0 +1977;1;FRH;PARTICIPATION ASSURE HORS TIERS PAYANT TRANSMISE SANS ACTE DE REFERENCE OU TYPE DE FRANCHISES;1;2;;1;16;0;;1;Z;20;0;26;;;0 +1978;1;FRT;PARTICIPATION ASSURE EN TIERS PAYANT TRANSMISE SANS ACTE DE REFERENCE OU TYPE DE FRANCHISES;1;2;;1;16;0;;1;Z;20;0;26;;;0 +1981;1;FHV;FORFAIT IVG HONORAIRES DE VILLE;1;0;1;0;;1;;1;C;20;0;1;1;;0 +1990;99;FIM;FORFAIT D'INTERVENTION PAR SORTIE SUR DEMANDE DE LA REGULATION;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1991;1;REG;REMUNERATION REGULATION;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1992;1;PRN;PERMANENCE REMUNERATION DE NUIT;1;0;1;0;;0;;1;Z;20;0;31;;;0 +1993;1;PRM;PERMANENCE REMUNERATION MILIEU DE NUIT;1;0;1;0;;0;;1;Z;20;0;31;;;0 +1994;1;PRD;PERMANENCE REMUNERATION DIMANCHE ET FERIE;1;0;1;0;;0;;1;Z;20;0;31;;;0 +1995;1;PRT;PERMANENCE REMUNERATION TOTAL;1;0;1;0;;0;;1;Z;20;0;31;;;0 +1996;1;RSA;PERMANENCE REMUNERATION SAMEDI MATIN;1;0;1;0;;0;;1;Z;20;0;31;;;0 +1997;1;RSP;PERMANENCE REMUNERATION SAMEDI APRES MIDI;1;0;1;0;;0;;1;Z;20;0;31;;;0 +1998;99;AJS;ASTREINTE DE JOUR CORRESPONDANT SAMU;1;9;99;0;;0;;1;Z;20;0;31;;;0 +1999;99;ANS;ASTREINTE DE NUIT CORRESPONDANT SAMU;1;9;99;0;;0;;1;Z;20;0;31;;;0 +2106;99;TDD;TRANSPORT DEFINITIF DIALYSE;0;9;10;1;13;0;;0;Z;22;0;10;;;0 +2107;99;TSD;TRANSPORT SEANCE DIALYSE;0;9;10;1;13;0;;0;Z;22;0;10;;;0 +2108;99;TDE;SUPPLEMENT TRANSPORT 2;0;9;10;0;;0;;0;Z;22;0;10;;;0 +2109;99;TSE;SUPPLEMENT TRANSPORT SEANCES;0;9;10;0;;0;;0;Z;22;0;10;;;0 +2111;5;GHS;FRAIS D HEBERGEMENT ET ENVIRONNEMENT EN GHS;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2112;5;EXH;FRAIS DE SEJOUR SUPPLEMENTAIRE AU GHS;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2113;5;GHT;GROUPE HOMOGENE DE TARIFS;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2114;99;SRA;SUPPLEMENT SOINS PARTICULIEREMENT COUTEUX (ARRETE DU 29/06/1978);0;9;99;0;;0;;0;Z;22;0;0;;fermé le28/02/2009;0 +2115;99;SSC;SUPPLEMENT SOINS CONTINUS;0;9;99;0;;0;;0;Z;22;0;0;;fermé le 28/02/2009;0 +2116;5;NN1;SUPPLEMENT NEONATOLOGIE 1;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2117;5;NN2;SUPPLEMENT NEONATOLOGIE 2;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2118;5;NN3;SUPPLEMENT NEONATOLOGIE 3;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2119;99;SDC;SUPPLEMENT DEFIBRILLATEUR;0;9;10;1;13;0;;0;Z;22;0;0;;;0 +2120;5;DTC;DIFFERENTIEL TARIFAIRE CLINIQUE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2121;5;D01;HEMODIALYSE EN CENTRE OU EN UNITE DE DIALYSE MEDICALISEE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 29/02/2008;0 +2122;5;D02;AUTODIALYSE SIMPLE OU ASSISTEE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 29/02/2008;0 +2142;5;D19;FORFAIT D ENTRAINEMENT A DIALYSE PERITONEALE CONTINUE AMBULATOIRE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 01/03/2013;0 +2143;5;D20;FF D ENTRAINEMENT A LA DIALYSE PERITONEALE AUTOMATISEE A DOMICILE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2144;5;D21;FF D ENTRAINEMENT A LA DIALYSE PERITONEALE CONTINUE AMBULATOIRE A DOMICILE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2145;5;D22;FORFAIT DE DIALYSE PERITONEALE AUTOMATISE POUR HOSPITALISATION DE 3 A 6 JOURS;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2146;5;D23;FORFAIT DE DIALYSE PERITONEALE CONTINUE AMBULATOIRE POUR HOSPITALISATION DE 3 A 6 JOURS;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2147;99;D24;FORFAIT D'ENTRAINEMENT A L'HEMODIALYSE EN UNITE DE DIALYSE MEDICALISEE;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2150;5;VDE;VIDEOCAPSULE;0;0;10;0;;0;;1;T;21;0;10;;;0 +2151;5;REA;SUPPLEMENT REANIMATION;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2152;5;SRC;SUPPLEMENT SURVEILLANCE CONTINUE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2153;5;STF;FORFAIT SOINS INTENSIFS;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2154;5;REP;REANIMATION PEDIATRIQUE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2155;5;SE1;FORFAIT ENVIRONNEMENT HOSPITALIER 1;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2156;5;SE2;FORFAIT ENVIRONNEMENT HOSPITALIER 2;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2157;5;SE3;FORFAIT ENVIRONNEMENT HOSPITALIER 3;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2158;5;SE4;FORFAIT ENVIRONNEMENT HOSPITALIER 4;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2159;5;FSD;FORFAIT DE SECURITE DERMATOLOGIQUE;1;0;10;0;;0;;1;T;20;0;49;;;0 +2160;99;MGS;MISSION D'INTERET GENERAL SSR;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2161;5;MGC;FORFAIT MISSION D INTERET GENERAL D AIDE A LA CONTRACTUALISATION;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2162;5;FHT;FORFAIT HAUTE TECHNICITE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2163;5;DIP;SUPPLEMENT JOURNALIER DIALYSE PERITONEALE;0;0;10;1;13;0;;0;Z;22;0;0;;;0 +2164;5;APE;ADMINISTRATION DE PRODUITS ET PRESTATIONS EN ENVIRONNEMENT HOSPITALIER;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2165;99;FPI;FORFAIT PRESTATION INTERMEDIAIRE;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2166;99;AP2;ADMINISTRATION DE MEDICAMENTS EN ENVIRONNEMENT HOSPITALIER;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2167;99;SE5;FORFAIT ENVIRONNEMENT HOSPITALIER 5;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2168;99;SE6;FORFAIT ENVIRONNEMENT HOSPITALIER 6;0;9;10;0;;0;;0;Z;22;0;10;;;0 +2170;99;FIP;FIR ETS PRIVES;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2171;99;DTA;DIFFERENTIEL TARIFAIRE AME;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2172;99;DTM;DIFFERENTIEL TARIFAIRE MIGRANTS;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2173;99;DPC;DIFFERENTIEL PSY REGLEMENTAIRE;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2174;99;DPA;DIFFERENTIEL PSY AME;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2175;99;DPM;DIFFERENTIEL PSY MIGRANTS;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2176;99;DSC;DIFFERENTIEL SSR REGLEMENTAIRE;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2177;99;DSA;DIFFERENTIEL SSR AME;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2178;99;DSM;DIFFERENTIEL SSR MIGRANTS;0;9;10;0;;0;;0;Z;22;0;0;;;0 +2181;5;PO1;PRELEVEMENT D ORGANE 1;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2182;5;PO2;PRELEVEMENT D ORGANE 2;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2183;5;PO3;PRELEVEMENT D ORGANE 3;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2184;5;CPO;COORDINATION PRELEVEMENT D ORGANES;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2185;5;PO4;PRELEVEMENT D ORGANE 4;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2186;5;PO5;PRELEVEMENT D'ORGANE 5;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2187;5;PO6;PRELEVEMENT D'ORGANE 6;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2188;5;PO7;PRELEVEMENT D'ORGANE 7;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2189;5;PO8;PRELEVEMENT D'ORGANE 8;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2190;5;PO9;PRELEVEMENT D'ORGANE 9;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2191;5;POA;PRELEVEMENT D'ORGANE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2195;5;ANT;SUPPLEMENT ANTEPARTUM;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2196;5;RAP;SUPPLEMENT RADIOTHERAPIE PEDIATRIQUE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2201;6;;BUDGET GLOBAL;0;0;10;0;;0;;0;Z;;0;0;;Pas d information sur la prestation;0 +2202;6;;MEDICALISATION DES PERSONNES AGEES;0;0;10;0;;0;;0;Z;;0;0;;Pas d information sur la prestation;0 +2203;6;;SSAD : SERVICE DE SOINS A DOMICILE;0;0;10;0;;0;;0;Z;;0;0;;Pas d information sur la prestation;0 +2204;6;;CAMSP: CENTRE ACTION MEDICO-SOCIALE PRECOCE;0;0;10;0;;0;;0;Z;;0;0;;Pas d information sur la prestation;0 +2334;5;FSE/SNS;SEANCE D HEMODIALYSE;0;0;10;0;;0;;0;Z;;0;0;;FSE fermé le 28/02/2005;0 +2335;5;FP;FORFAIT DE PANSEMENT;0;0;10;0;;0;;0;Z;21;0;0;;;0 +2336;5;SNS;FORFAIT POUR CONSULTATION EN CENTRE MEDICO-PSYCHO PEDAGOGIQUE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2337;5;SD;SEANCE DE DIAGNOSTIC;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2338;5;FFM;FORFAIT PETIT MATERIEL;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2339;5;FS/SNS;AUTRES FORFAITS DIVERS (Y COMPRIS NUTRITION ENTERALE A DOMICILE);0;0;10;0;;0;;0;Z;;0;0;;;0 +2341;5;SFC;SUPPLEMENT AU FORFAIT CHIMIOTHERAPIE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2342;5;RGD;FORFAIT POUR GARDE DE DEBUT DE NUIT EN ETABLIS. PRIVE;0;0;10;0;;0;;1;Z;20;0;47;;;0 +2343;5;RGN;FORFAIT POUR GARDE DE NUIT OU SAMEDI APRES MIDI EN ETABLIS. PRIVE;0;0;10;0;;0;;1;Z;20;0;47;;;0 +2344;5;FPG;FORFAIT DE GARDE NUIT ET FERIE EN ETABLIS. PRIVE;0;0;10;0;;0;;1;Z;20;0;47;;;0 +2345;5;RAN;FORFAIT POUR ASTREINTE DE DEBUT DE NUIT EN ETABLIS. PRIVE;0;0;10;0;;0;;1;Z;20;0;47;;;0 +2346;5;RAG;FORFAIT POUR ASTREINTE DE NUIT OU SAMEDI APRES MIDI EN ETABLIS. PRIVE;0;0;10;0;;0;;1;Z;20;0;47;;;0 +2347;5;FPA;FORFAIT D'ASTREINTE NUIT ET FERIE EN ETABLIS. PRIVE;0;0;10;0;;0;;1;Z;20;0;47;;;0 +2351;5;FTN;FORFAIT TECHNIQUE NORMAL IRMN -SCANNERS;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2352;5;FTR;FORFAIT TECHNIQUE REDUIT IRMN -SCANNERS;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2353;5;FTS;FORFAIT TECHNIQUE SCANNER (SPP expo amiante);1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2354;5;FTG;FORFAIT TECHNIQUE TOMOGRAPHIE;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2355;5;N01;FORFAIT CONCOMMABLE MEDECINE NUCLEAIRE 01;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2356;5;N02;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 02;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2357;5;N03;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 03;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2358;5;N04;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 04;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2359;5;N05;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 05;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2360;5;N06;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 06;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2361;5;N07;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 07;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2362;5;N08;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 08;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2363;5;N09;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 09;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2364;5;N10;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 10;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2365;5;N11;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 11;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2366;5;N12;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 12;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2367;5;N13;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 13;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +2368;5;N14;FORFAIT CONSOMMABLE MEDECINE NUCLEAIRE 14;1;1;10;0;;0;1;0;Z;22;0;0;;;0 +1906;99;NFP;NOUVEAU FORFAIT PEDIATRIQUE;1;9;99;1;5;0;;1;C;20;0;1;1;;0 +1907;99;PRX;REMUNERATION DES SOINS DE PROXIMITE;1;9;99;0;;0;;0;Z;20;0;0;;;0 +1908;99;SP2;FORFAIT SORTIE PRECOCE ET TEST GUTHRIE;1;9;99;0;;1;;1;Z;20;0;1;1;;0 +1909;99;SP1;FORFAIT SORTIE PRECOCE;1;9;99;0;;1;;1;Z;20;0;1;1;;0 +1910;99;PPS;Plan personnalisé de santé;1;9;99;0;;0;;1;Z;20;0;46;;;0 +1911;1;SF;ACTES DES SAGES-FEMMES;1;1;;0;;1;1;1;Z;20;0;33;1;;0 +1912;1;NA;HONORAIRES NON VENTILABLES INDIVIDUALISES;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1913;1;MM;MAJORATION MILIEU DE NUIT;1;1;1;1;1;0;1;1;Z;20;0;70;1;;0 +1914;1;FPE;FORFAIT PEDIATRIQUE;1;1;;1;5;0;;1;Z;20;0;1;1;fermé le 31/12/2017;0 +1915;1;ASR;FORFAIT ASTREINTE PROF;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1916;1;RPG;REMUNERATION POUR GARDE ETS PRIVES;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1917;1;RPA;REMUNERATION POUR ASTREINTE ETS PRIVES;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1918;1;MU;MAJORATION D'URGENCE;1;0;1;1;3;0;1;1;C;20;0;1;1;;0 +1919;1;;DEPENSES DE MEDECINE FORFAITAIRE (OMNIPRATICIENS ET AUXILIAIRES MEDICAUX);1;0;1;0;;0;;0;Z;;0;0;;Pas d information sur la prestation;0 +1920;99;CIS;CONTRAT DEMOGRAPHIQUE SAGE-FEMME;1;9;99;0;;0;;1;Z;;0;47;;;0 +1921;1;TDR;TEST DE DIAGNOSTIC RAPIDE;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1922;99;CG;CONSULTATION;1;9;99;0;;1;;1;Z;20;0;1;1;fermé le 16/04/2013;0 +1923;1;SP;EXAMEN DE SUIVI POST NATAL;1;1;;0;;1;;1;Z;20;0;1;1;;0 +1924;1;VGM;REMUNERATION VACCINATION GRIPPE A MEDECIN;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1925;1;VMR;REMUNERATION VACCINATION GRIPPE A PAR MEDECIN RETRAITE ET SALARIES HORS OBLIGATIONS;1;0;1;0;;0;;0;Z;20;0;0;;;0 +1926;99;VAC;ACTE DE VACCINATION GRIPPE A/H1N1;1;9;99;0;;0;;1;T;20;0;1;;fermé le 30/09/2010;0 +1931;1;MNP;MAJORATION NOURRISSON PEDIATRE;1;1;;1;17;0;;1;Z;20;0;1;1;fermé le 31/12/2017;0 +1932;1;MNO;MAJORATION NOURRISSON GENERALISTE;1;1;;1;17;0;;1;Z;20;0;1;1;fermé le 31/12/2017;0 +1933;1;CRN;MAJORATION CONSULTATION REGULEE DE NUIT;1;0;1;1;1;0;;1;Z;20;0;1;1;;0 +1934;1;CRM;MAJORATION CONSULTATION REGULEE MILIEU DE NUIT;1;0;1;1;1;0;;1;Z;20;0;1;1;;0 +1935;1;CRD;MAJORATION CONSULTATION REGULEE DIMANCHE, JOURS FERIES ET ASSIMILES;1;0;1;1;2;0;;1;Z;20;0;1;1;;0 +1936;1;VRN;MAJORATION VISITE REGULEE DE NUIT;1;0;1;1;1;0;;1;Z;20;0;1;1;;0 +1937;1;VRM;MAJORATION VISITE REGULEE MILIEU DE NUIT;1;0;1;1;1;0;;1;Z;20;0;1;1;;0 +1938;1;VRD;MAJORATION VISITE REGULEE DE DIMANCHE, JOURS FERIES ET ASSIMILES;1;0;1;1;2;0;;1;Z;20;0;1;1;;0 +1939;99;MSF;MAJORATION SAGE-FEMME;1;9;99;1;22;0;;1;C;20;0;1;1;;0 +1940;99;DSP;FORFAIT SORTIE PRECOCE;1;9;99;1;22;0;;1;C;20;0;1;1;;0 +1941;1;CRS;MAJORATION CONSULTATION REGULEE SAMEDI APRES MIDI;1;0;1;1;2;0;;1;Z;20;0;1;1;;0 +1942;1;VRS;MAJORATION VISITE REGULEE SAMEDI APRES MIDI;1;0;1;1;2;0;;1;Z;20;0;1;1;;0 +1943;99;MUT;MAJORATION URGENCE MT;1;9;99;1;22;0;;1;C;20;0;1;1;;0 +1944;99;MCU;MAJORATION CORRESPONDANT URGENCE;1;9;99;1;22;0;;1;C;20;0;1;1;;0 +1945;99;MRT;MAJORATION MEDECIN TRAITANT REGULATION;1;9;99;1;22;0;;1;C;20;0;1;1;;0 +1951;1;PFH;PARTICIPATION FORFAITAIRE HORS TIERS PAYANT;1;2;;1;16;0;;1;Z;20;0;54;;;0 +1952;1;PFT;PARTICIPATION FORFAITAIRE TIERS PAYANT;1;2;;1;16;0;;1;Z;20;0;54;;;0 +1954;5;PAE;PARTICIPATION ASSURE CONSULTATIONS ET SOINS EXTERNES (CMU + AME);1;0;;1;16;0;;0;Z;20;0;0;;;0 +1955;1;PAL;PARTICIPATION ASSURE CONSULTATIONS ET SOINS EXTERNES (REGIME LOCAL);1;0;;1;16;0;;0;Z;20;0;0;;;0 +1956;1;PAP;PARTICIPATION ASSURE EN AMBULATOIRE;1;0;1;1;16;0;;1;Z;20;0;56;;;0 +1957;99;TMT;MAJORATION HORS PARCOURS DE SOINS;1;9;99;1;24;0;;1;Z;20;0;54;;;0 +1960;99;SGA;SUPPLEMENT DEROGATOIRE SG SUR ACTE PROFESSIONNEL NON REMBOURSABLE (CNMSS);0;9;99;0;;1;;1;Z;20;0;1;;;0 +1961;99;DAP;SUPPLEMENT DEROGATOIRE SG SUR ACTE PROFESSIONNEL REMBOURSABLE (CNMSS);0;9;99;1;29;0;;1;Z;20;0;1;;;0 +1971;1;FRH;FRANCHISE HORS TIERS PAYANT SUR MEDICAMENT;1;1;;1;16;0;;1;Z;20;0;53;;;0 +1972;1;FRT;FRANCHISE TIERS PAYANT SUR MEDICAMENT;1;1;;1;16;0;;1;Z;20;0;53;;;0 +1973;1;FRH;FRANCHISE HORS TIERS PAYANT SUR TRANSPORT;1;1;;1;16;0;;1;Z;20;0;52;;;0 +9743;12;HAT;AIDES A LA DEAMBULATION ET AU TRANSPORT;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9744;12;HAU;AUTRES TYPES D AIDES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9751;12;SPL;FOURNITURES ET ACCESSOIRES NON REMBOURSABLES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9752;12;HAD;GARDES MALADES A DOMICILE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9761;12;SOR;AIDES FINANCIERES INDIVIDUELLES ORTHODONTIE;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9762;12;SEP;AIDES FINANCIERES INDIVIDUELLES PHARMACIE/LPP;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9763;12;DIL;AIDES FINANCIERES A CARACTERE SOCIAL AFFECTEES AU LOGEMENT;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9764;12;DIB;AIDES FINANCIERES A CARACTERE SOCIAL AFFECTEES AUX BESOINS ALIMENTAIRES ET VESTIMENTAIRES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9765;12;DIS;AIDES FINANCIERES A CARACTERE SOCIAL AFFECTEES AUX FRAIS DE SCOLARISATION;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9766;12;DIT;AIDES FINANCIERES A CARACTERE SOCIAL AFFECTEES AUX REGLEMENTS D'IMPOTS TAXES ET PRIMES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9767;12;DIA;AIDES FINANCIERES A CARACTERE SOCIAL D'ATTENTE DE VERSEMENT DE REVENUS DE SUBSTITUTION;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9768;12;DIO;AIDES FINANCIERES A CARACTERE SOCIAL AFFECTEES AUX FRAIS D'OBSEQUES;0;0;19;0;;0;;0;Z;21;0;0;;;0 +9413;13;;HYGIENE BUCCO-DENTAIRE N92 (SCELLEMENT DE DEUX MOLAIRES);0;0;20;0;;0;;1;Z;;0;20;;Saisie manuelle Qualiflux;0 +9414;13;;HYGIENE BUCCO-DENTAIRE N93 (SCELLEMENT DE TROIS MOLAIRES);0;0;20;0;;0;;1;Z;;0;20;;Saisie manuelle Qualiflux;0 +9415;13;;HYGIENE BUCCO-DENTAIRE N94 (SCELLEMENT DE QUATRE MOLAIRES);0;0;20;0;;0;;1;Z;;0;20;;Saisie manuelle Qualiflux;0 +9421;13;BDC;PREVENTION BUCCO-DENTAIRE: CONSULTATION;0;0;20;0;;1;;1;Z;20;0;20;;;0 +9422;13;BR2;PREVENTION BUCCO-DENTAIRE: RADIO DEUX CLICHES;0;0;20;0;;1;;1;Z;20;0;20;;;0 +9423;13;BR4;PREVENTION BUCCO-DENTAIRE: RADIO QUATRE CLICHES;0;0;20;0;;1;;1;Z;20;0;20;;;0 +9424;13;RIN;PREVENTION BUCCO-DENTAIRE: RADIO EN IMAGERIE NUMERISEE;0;0;20;0;;0;;1;Z;;0;20;;fermé le 28/02/2005;0 +9425;12;;TM POUR DC (MSA);0;0;19;0;;0;;1;Z;;0;22;;Specifique NTEIR;0 +9426;12;;TM POUR SC(MSA);0;0;19;0;;0;;1;Z;;0;22;;Specifique NTEIR;0 +9427;12;;TM POUR Z(MSA);0;0;19;0;;0;;1;Z;;0;22;;Specifique NTEIR;0 +9429;13;BD2;CAMPAGNE BUCCO DENTAIRE MOCALES;0;0;20;0;;0;;1;Z;20;0;20;;;0 +9430;99;;TM DE L'ACTE DE VACCINATION GRIPPE A/H1N1;0;9;99;0;;0;;1;Z;;0;22;;fermé le 30/09/2010;0 +9431;13;PES;PREVENTION ENTRETIEN DE SANTE;0;0;20;0;;0;;1;Z;20;0;20;;;0 +9432;13;;TM DES INDEMNITES DE DEPLACEMENTS ID et MD;0;0;20;0;;0;;1;Z;;0;22;;;0 +9433;99;;TM DE LA RETINOPATHIE DIABETIQUE;0;9;99;0;;0;;1;Z;;0;22;;;0 +9434;99;DCC;ACTE DE DEPISTAGE DU CANCER COLORECTAL;0;9;99;0;;0;;1;Z;20;0;4;;;0 +9511;13;BDS/EDS;EXAMEN ET BILAN DE SANTE;0;0;20;0;;0;;0;Z;;0;0;;;0 +9512;13;BD5;AUTRES ACTIONS COLLECTIVES DE PREVENTION;0;0;20;0;;0;;0;Z;21;0;0;;;0 +9521;13;PDI;ACTES DE PREVENTION;1;0;20;0;;0;;0;Z;21;0;0;;;0 +9566;13;TNS;TRAITEMENT NICOTINIQUE DE SUBSTITUTION;1;0;20;0;;1;;1;Z;21;0;30;;fermé le 31/12/2018;0 +9567;13;RSO;REMUNERATION ADHESION SOPHIA (PREVENTION);0;0;20;0;;0;;1;Z;20;0;47;;La prestation 9567 est remplace par 1142;0 +9568;13;RSR;REMUNERATION RENOUVELLEMENT SOPHIA (PREVENTION);0;0;20;0;;0;;1;Z;20;0;47;;La prestation 9568 est remplace par 1143;0 +9569;99;RAD;RETOUR DOM. INSUFFISANT CARDIAQUE;0;9;99;0;;1;;1;Z;20;0;3;;;0 +9570;99;BPC;BRONCHO-PNEUMOPATHIE CHRONIQUE OBSTRUCTIVE;0;9;99;0;;0;;1;Z;20;1;3;;;0 +9601;12;MCP;MUTUELLE CHAMBRE PARTICULIERE (CAVIMAC);0;0;19;0;;0;;0;Z;;0;0;;fermé le 13/04/2003;0 +9602;12;MFM;FORFAIT DE SOINS INFIRMIERS MUTUELLE SECTEUR MEDICAL (CAVIMAC);0;0;19;0;;0;;0;Z;;0;0;;fermé le 13/04/2003;0 +9603;12;MFR;FORFAIT DE SOINS INFIRMIERS MUTUELLE SECTEUR REPOS CONVALESCENCE (CAVIMAC);0;0;19;0;;0;;0;Z;;0;0;;fermé le 13/04/2003;0 +9604;12;MLS;MUTUELLE LONG SEJOUR;0;0;19;0;;0;;0;Z;;0;0;;fermé le 13/04/2003;0 +9701;12;SSU;COMPLEMENT D ACTION SOCIALE APPAREIL DE SURDITE (CLERCS ET EMPLOYES DE NOTAIRES, PORT AUTONOME DE BORDEAUX);0;0;19;0;;0;;1;Z;21;0;19;;;0 +9702;12;;COMPLEMENT D ACTION SOCIALE BRIDGE (CLERCS ET EMPLOYES DE NOTAIRES);0;0;19;0;;0;;0;Z;;0;0;;Autre regime NEC;0 +9703;12;SDO;COMPLEMENT D ACTION SOCIALE DENTAIRE (CLERCS ET EMPLOYES DE NOTAIRES);0;0;19;0;;0;;1;Z;20;0;49;;;0 +9805;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9806;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9807;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9808;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9809;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9810;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9811;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9812;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9813;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9814;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9815;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9816;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9817;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9818;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9819;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9820;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9821;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9822;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9823;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9824;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9825;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9826;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9827;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9828;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9829;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9830;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9831;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9832;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9833;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9834;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9835;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9836;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9837;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9838;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +9839;0;;CODES MIS A LA DISPOSITION DE LA SNCF;0;0;;0;;0;;0;Z;;0;0;;Specifique NTEIR;0 +3351;2;PHU;MEDICAMENT AVEC UNE AUTORISATION TEMPORAIRE D'UTILISATION;1;0;5;0;;0;;1;Z;21;0;5;;;0 +3352;2;PHM;PREPARATION MAGISTRALE HOSPITALIRE;1;0;5;0;;0;;1;Z;21;0;5;;;0 +3353;2;PHP;PREPARATION HOSPITALIERE;1;0;5;0;;0;;1;Z;21;0;5;;;0 +3354;2;PHI;MEDICAMENT AVEC AUTORISATION D'IMPORTATION;1;0;5;0;;0;;1;Z;21;0;5;;;0 +3355;2;MAR;MARGE FORFAITAIRE (MEDICAMENTS HOSPITALIERS);1;0;5;0;;0;;1;Z;21;0;12;;;0 +3356;2;PHD;PHARMACIE HOSPITALIERE DEROGATOIRE;1;0;5;0;;0;;1;Z;21;0;5;;;0 +3357;2;PHT;PHARMACIE HOSPITALIERE MMH;1;0;5;0;;0;;1;Z;21;0;5;;;0 +3361;2;GPN;GARDE PHARMACIE NUIT;1;0;5;0;;0;;0;Z;21;0;0;;;0 +3362;2;GPF;GARDE PHARMACIE FERIE;1;0;5;0;;0;;0;Z;21;0;0;;;0 +3363;2;GPD;GARDE PHARMACIE DIMANCHE;1;0;5;0;;0;;0;Z;21;0;0;;;0 +3364;99;HDR;HONORAIRE MEDICAMENT REMBOURSABLE;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3365;99;HDA;HONORAIRE DISPENSATION EN LIEN AVEC AGE;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3366;99;HDE;HONORAIRE DE DISPENSATION MEDICAMENTS SPECIFIQUES;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3374;99;KGP;KIT ANTI-GRIPPE;0;9;99;0;;1;;1;Z;21;0;12;;fermé le 31/05/2010;0 +3375;3;;REMUNERATION PHARMACIENS POUR VACCINS H1N1;0;0;12;0;;1;;1;Z;;0;12;;Pas d information sur la prestation;0 +3378;99;CTR;CONTESTATION ROSP TRANSMISSION RPPS;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3379;99;HDS;Honoraire de dispensation spécifique vaccins anti grippaux Hémisphère Sud;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3380;99;HC;HONO DISP COMP;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3381;2;PPI;PREPARATION PHARMACEUTIQUE INDIVIDUALISEE (ALLERGENES);1;0;5;0;;1;;1;Z;21;0;5;;;0 +3382;99;PDP;PRISE EN CHARGE DEROGATOIRE PHARMACIE;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3383;99;HD1;HONO DISP 1;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3384;99;HD2;HONO DISP 2;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3385;99;HD4;HONO DISP 4;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3386;99;HD7;HONO DISP 7;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3387;99;HG1;HONO DISP GC 1;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3388;99;HG2;HONO DISP GC 2;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3389;99;HG4;HONO DISP GC 4;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3390;99;HG7;HONO DISP GC 7;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3391;99;ROP;REMU OBJECTIF - PHARMACIEN;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3392;99;COP;REMU OBJECTIF - PHARMACIEN COMPLEMENT;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3393;99;AVK;ROSP AVK;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3394;99;CVK;ROSP AVK COMPLEMENT;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3395;99;PPH;Plan personnalisé de santé pharmacie;1;9;99;0;;0;;1;Z;21;0;12;;;0 +3396;99;AHM;ROSP ASTHME;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3397;99;CHM;ROSP ASTHME COMPLEMENT;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3398;99;RTR;ROSP TRANSMISSION RPPS;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3399;3;;REMUNERATION PHARMACIENS POUR VACCINS H1N1 - REGULARISATION COMPTABLE ET SAISIE MANUELLE;0;0;12;0;;0;;1;Z;;0;12;;Saisie manuelle Qualiflux;0 +3411;2;SNG;SANG,PLASMA ET LEURS DERIVES;1;0;5;0;;1;;1;Z;21;0;12;;;0 +3412;2;TSG;TRANSPORT DU PRODUIT;1;0;5;0;;0;;1;Z;21;0;12;;;0 +3413;2;LAI;LAIT HUMAIN;1;0;5;0;;0;;1;Z;21;0;12;;;0 +3414;2;HUM;AUTRES PRODUITS D ORIGINE HUMAINE;1;0;5;0;;0;;1;Z;21;0;12;;;0 +3511;2;AAR;APPAREILS D ASSISTANCE RESPIRATOIRE,OXYGENOTHERAPIE A DOMICILE;1;0;6;0;;1;;1;Z;21;0;6;;;0 +3512;2;AAD;AUTRES MATERIELS POUR TRAITEMENTS A DOMICILE (CHAP. 1);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3513;2;MAC;MATERIELS ET APPAREILS DE CONTENTION ET DE MAINTIEN (CHAP. 2);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3514;2;MAD;MATERIELS ET APPAREILS POUR TRAITEMENTS DIVERS (CHAP. 3);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3515;2;PAN;ARTICLES DE PANSEMENTS (CHAP. 4);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3516;2;GLU;NUTRIMENTS POUR INTOLERANTS AU GLUTEN (CHAP. 3);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3517;2;NUT;ALIMENTS DESTINES A DES FINS MEDICALES;1;0;5;0;;1;;1;Z;21;0;12;;;0 +3518;2;ARO;APPAREIL GENERATEUR D AEROSOL;1;0;6;0;;1;;1;Z;21;0;6;;;0 +3521;2;PA;ORTHESES (PETIT APPAREILLAGE) (CHAP. 1);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3522;2;DVO;DIVERS ORTHESES;1;0;6;0;;1;;1;Z;21;0;6;;;0 +3523;99;OME;FORFAIT MONTURE MOINS DE 18 ANS CMU;1;9;99;0;;1;;1;Z;21;0;6;;;0 +3524;99;OVA;FORFAIT OPTIQUE ENFANT -A- MULTIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3525;99;OVB;FORFAIT OPTIQUE ENFANT -B- MULTIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3526;99;OP7;FORFAIT OPTIQUE -N° 6 MULTIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3527;99;OV1;FORFAIT OPTIQUE -ENFANT-N° 1 UNIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3314;2;MX1/PH1;HORMONE DE CROISSANCE;1;0;5;0;;1;;1;Z;;0;5;;MX1 : fermé le 31/03/2003;0 +3315;2;MX4/PH7;MEDICAMENTS ANTIRETROVIRAUX;1;0;5;0;;1;;1;Z;;0;5;;MX4 : fermé le 31/03/2003;0 +3316;2;MX7/PH7;MEDICAMENTS D EXCEPTION;1;0;5;0;;1;;1;Z;;0;5;;MX7 : fermé le 31/03/2003;0 +3317;2;PHH;PHARMACIE HOSPITALIERE A 100%;1;0;5;0;;0;;1;Z;21;0;5;;;0 +3318;2;PHS;PHARMACIE HOSPITALIERE A 65%;1;0;5;0;;0;;1;Z;21;0;5;;;0 +3319;2;PHQ;PHARMACIE HOSPITALIERE;1;0;5;0;;0;;1;Z;21;0;5;;;0 +3320;2;PH8;PHARMACIE HOSPITALIERE EN SUS DU GHS;1;0;5;0;;0;;0;Z;22;0;0;;;0 +3321;2;PHA;FORFAIT PHARMACEUTIQUE EN MATERNITE;1;0;5;1;21;0;;0;Z;21;0;0;;;0 +3322;2;UPH;MAJORATION POUR ACHAT HORS HEURES OUVRABLES;1;0;5;1;21;0;;1;Z;21;0;5;;;0 +3323;2;CPH;COPIE D ORDONNANCE;1;0;5;1;21;0;;1;Z;21;0;5;;fermé le 30/06/2015;0 +3324;2;EMI;ECART MEDICAMENT INDEMNISABLE;1;0;5;1;21;0;;0;Z;22;0;0;;;0 +3325;2;MPI;MAJORATION PHARMACIE DES ILES;1;0;5;1;21;0;;1;Z;21;0;12;;;0 +3326;2;PMR;PREPARATION MAGISTRALE REMBOURSABLE;1;0;5;0;;1;;1;Z;21;0;5;;;0 +3327;2;PMH;PREPARATION MAGISTRALE HOMEOPATHIQUE;1;0;5;0;;1;;1;Z;21;0;5;;;0 +3328;2;MHU;MEDICAMENTS HOMEOPATHIQUES UNITAIRES;1;0;5;0;;1;;1;Z;21;0;5;;;0 +3329;2;FMV;FORFAIT MEDICAMENT IVG VILLE;1;0;5;0;;1;;1;Z;20;0;18;;;0 +3330;99;ERI;ECART INDEMNISABLE RETROCESSION;1;9;99;0;;0;;0;Z;21;0;0;;;0 +3331;2;PH7/ANTI GRIPPE;VACCIN ANTI-GRIPPE;1;1;;0;;1;;1;Z;;0;5;;;0 +3332;2;PH7/ROR;VACCIN ROR;1;1;;0;;1;;1;Z;;0;5;;;0 +3333;2;PH7/ANTI PALUDEEN;ANTI PALUDEEN;1;1;;0;;0;;1;Z;;0;5;;;0 +3334;99;GS1;Vaccin anti grippe Hémisphère Sud VAXIGRIP HS;1;9;99;0;;1;;1;Z;21;0;5;;;0 +3335;99;GS2;Vaccin anti grippe Hémisphère Sud FLUARIX HS;1;9;99;0;;1;;1;Z;21;0;5;;;0 +3336;99;PHX;PHARMACIE SOUS ATU SEJOUR;1;9;99;0;;0;;1;Z;22;0;5;;;0 +3337;99;FFC;FORFAIT FAUSSE COUCHE VILLE;1;9;99;0;;1;;1;Z;20;0;46;;;0 +3338;99;FFV;FORFAIT FAUSSE COUCHE VILLE SANS ÉCHOGRAPHIE;1;9;99;0;;1;;1;Z;20;0;46;;;0 +3339;99;FEF;FORFAIT FAUSSE COUCHE ETABLISSEMENT AVEC ECHOGRAPHIE;1;9;99;0;;1;;1;Z;20;0;46;;;0 +3340;99;FFE;FORFAIT FAUSSE COUCHE ETABLISSEMENT SANS ECHOGRAPHIE;1;9;99;0;;1;;1;Z;20;0;46;;;0 +3341;2;PH2;PHARMACIE 15%;1;0;5;0;;1;;1;Z;21;0;28;;;0 +3342;2;PM2;PREPARATION MAGISTRALE ALLOPATHIQUE 15 %;1;0;5;0;;1;;1;Z;21;0;5;;;0 +3343;2;PM4;PREPARATION MAGISTRALE ALLOPATHIQUE;1;0;5;0;;1;;1;Z;21;0;5;;;0 +4112;1;THR;FORFAIT DE SURVEILLANCE MEDICALE REDUIT 2EME HANDICAP;1;0;1;0;;1;;1;C;20;1;1;;fermé le 31/12/1999;0 +4113;1;KTH;PRATIQUES MEDICALES COMPLEMENTAIRES COTEES EN K;1;0;1;0;;1;;1;T;20;1;1;1;;0 +4114;1;CST;COMPLEMENT SURVEILLANCE THERMALE;1;0;1;1;11;0;;1;Z;20;1;1;1;fermé le 01/01/2015;0 +4121;3;FTH;FORFAITS EN ETABLISSEMENT (DATE FIN 12/98);1;0;8;0;;0;;0;Z;;1;0;;fermé le 31/12/1999;0 +4122;3;CTH;SUPPLEMENTS THERMAUX 1ER HANDICAP (DATE FIN 12/98);1;0;8;0;;0;;0;Z;;1;0;;fermé le 31/12/1999;0 +4123;3;THS;SUPPLEMENTS THERMAUX 2EME HANDICAP (DATE FIN 12/98);1;0;8;0;;0;;0;Z;;1;0;;fermé le 31/12/1999;0 +4131;3;TTH;FRAIS DE TRANSPORT - CURES THERMALES;1;1;;0;;0;;0;Z;21;1;0;;;0 +4132;3;HTH;FRAIS D HOTEL - CURES THERMALES;1;0;7;0;;0;;0;Z;21;1;0;;;0 +4141;3;TH1;FORFAIT THERMAL 1;1;0;8;0;;0;;1;Z;21;1;8;;;0 +4142;3;TH2;FORFAIT THERMAL 2 AVEC KINESITEHRAPIE;1;0;8;0;;0;;1;Z;21;1;8;;;0 +4143;3;TH3;FORFAIT THERMAL 2EME ORIENTATION;1;0;8;0;;0;;1;Z;21;1;8;;;0 +4144;3;TH4;FORFAIT THERMAL 3 AVEC 9 SEANCES KINESITHERAPIE;1;0;8;0;;0;;1;Z;21;1;8;;;0 +4145;3;TH5;FORFAIT THERMAL 72 SEANCES AVEC KINE;1;0;8;0;;0;;1;Z;21;0;8;;;0 +4151;3;MK1;FORFAIT THERMAL 18 SEANCES COLLECTIVES;1;0;8;0;;0;;1;Z;21;1;8;;;0 +4152;3;MK2;FORFAIT THERMAL 18 SEANCES INDIVIDUELLES;1;0;8;0;;0;;1;Z;21;1;8;;;0 +4153;3;MK3;FORFAIT THERMAL 9 SEANCES COLLECTIVES;1;0;8;0;;0;;1;Z;21;1;8;;;0 +4154;3;MK4;FORFAIT THERMAL 9 SEANCES INDIVIDUELLES;1;0;8;0;;0;;1;Z;21;1;8;;;0 +4155;99;FSB;THERMAL-SEVRAGE DES PSYCHOTROPES;1;9;99;0;;0;;1;C;21;1;8;;;0 +4156;99;FKS;THERMAL-SUITE CANCER DU SEIN;1;9;99;0;;0;;1;C;21;1;8;;;0 +4157;99;TC1;THERMAL-TROUBLE COMPORTEMENT 72 SEANCES;1;9;99;0;;0;;1;C;21;1;8;;;0 +4158;99;TC2;THERMAL-TROUBLE COMPORTEMENT 108 SEANCES;1;9;99;0;;0;;1;C;21;1;8;;;0 +4204;99;FUE;FORFAIT TRANSPORT URGENCE EXTRAMUROS CPAM MEUSE;1;9;99;0;;0;;1;Z;21;1;93;;;0 +4205;99;FUI;FORFAIT TRANSPORT URGENCE INTRAMUROS CPAM MEUSE;1;9;99;0;;0;;1;Z;21;1;93;;;0 +4206;99;AFG;PRESTATION FIN DE GARDE AMBULANCE;1;9;99;0;;0;;1;Z;21;1;93;;;0 +4207;99;FTU;FORFAIT TRANSPORT D'URGENCE EXPERIMENTATION CPAM AUDE;1;9;99;0;;0;;1;Z;21;1;93;;;0 +4208;99;FUS;FORFAIT D'URGENCE SUR APPEL DU SAMU EXPERIMENTATION CPAM BOUCHES-DU-RHONE;1;9;99;0;;0;;1;Z;21;1;93;;;0 +4209;99;CTU;COMPLEMENT TRANSPORTS D'URGENCE;1;9;99;0;;0;;1;Z;20;0;50;;;0 +4210;99;TXA;Taxi tarif A;1;9;99;0;;0;;1;Z;21;0;92;;;0 +4211;3;SMU;SERVICES MOBILES D URGENCE ET DE REANIMATION (SMUR);1;0;7;0;;0;;1;Z;21;1;23;;;0 +4212;3;ABA;AMBULANCES AGREEES;1;0;7;0;;0;;1;Z;21;1;90;;;0 +4213;3;VSL;VEHICULES SANITAIRES LEGERS (VSL);1;0;7;0;;0;;1;Z;21;1;91;;;0 +4214;3;TXI;TAXIS;1;0;7;0;;0;;1;Z;21;1;92;;;0 +3528;99;OV2;FORFAIT OPTIQUE -ENFANT-N° 2 UNIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3529;99;OV3;FORFAIT OPTIQUE -ENFANT-N° 3 UNIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3530;99;OV4;FORFAIT OPTIQUE -ENFANT-N° 4 UNIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3531;2;OPT;OPTIQUE MEDICALE PROPREMENT DIT;1;0;6;0;;1;;1;Z;;0;6;;;0 +3532;2;LUN/LNE;MONTURE/LUNETTE POUR ENFANT DE - DE 18 ANS CRPCEN (HORS CODAGE LPP);1;0;6;0;;0;;1;Z;;0;6;;;0 +3533;2;VER/OPE;VERRES / VERRES POUR ENFANT<18 ans -CRPCEN- (hors codage LPP);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3534;2;OPR/ROM/ROV;REPARATION;1;0;6;0;;0;;1;Z;;0;6;;OPR: fermé le 01/01/2000;0 +3535;2;LEN;LENTILLES;1;0;6;0;;1;;1;Z;21;0;6;;;0 +3536;2;OP1;VERRES UNIFOCAUX OP1 (CMU);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3537;2;OP2;VERRES UNIFOCAUX OP2 (CMU);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3538;2;OP3;VERRES UNIFOCAUX OP3 (CMU);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3539;2;OP4;VERRES UNIFOCAUX OP4 (CMU);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3540;99;PAZ;PROTHESE AUDITIVE RAC ZERO;1;9;99;0;;1;;1;T;21;0;6;;;0 +3541;2;PAU/AUA;APPAREILS ELECTRONIQUES DE SURDITE (CHAP 3.);1;0;6;0;;1;;1;Z;;0;6;;;0 +3542;2;PEX;PROTHESES EXTERNES NON ORTHOPEDIQUES (CHAP. 4);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3543;2;POC;PROTHESES OCULAIRES ET FACIALES (CHAP. 5);1;0;6;0;;0;;1;Z;21;0;6;;;0 +3544;2;COR;CHAUSSURES ORTHOPEDIQUES (CHAP. 6);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3545;2;ORP;ORTHOPROTHESES (CHAP 7.);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3546;2;ORC;ACCESSOIRES DE PROTHESES ET D ORTHOPEDIE (CENTRES D APPAR.) (CHAP.8);1;0;6;0;;0;;1;Z;;0;6;;fermé le 01/11/2000;0 +3547;2;AUP;APPAREILS ELECTRONIQUES DE SURDITE (CONSOMMABLES Y.C. PILES);1;0;6;0;;0;;1;Z;21;0;6;;;0 +3548;2;OPC;ORTHOPROTHESES COUTEUSES;1;0;6;0;;0;;1;Z;21;0;6;;;0 +3549;99;PIO;PROCESSEUR POUR IMPLANT OSTE-INTEGRE;1;9;99;0;;1;;1;Z;21;0;6;;;0 +3550;99;SUI;PROTHESE AUDITIVE SUIVI;1;9;99;0;;1;;1;T;21;0;6;;;0 +3551;2;PII;IMPLANT INTERNE (CHAP. 1, 2 ET 3);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3552;2;PME;IMPLANT MU PAR ELECTRICITE (CHAP. 4);1;0;6;0;;0;;1;Z;21;0;6;;;0 +3553;99;OV5;FORFAIT OPTIQUE -ENFANT-N° 5 UNIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3554;99;OV6;FORFAIT OPTIQUE -ENFANT-N° 6 UNIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3555;99;OV7;FORFAIT OPTIQUE -ENFANT-N° 7 UNIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3556;99;OV8;FORFAIT OPTIQUE -ENFANT-N° 8 UNIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3557;99;OV9;FORFAIT OPTIQUE -ENFANT-N° 9 UNIFOCAUX (CMU);1;9;99;0;;1;;1;Z;21;0;6;;;0 +3561;2;VEH;VEHICULES POUR HANDICAPES PHYSIQUES;1;0;6;0;;1;;1;Z;21;0;6;;;0 +3571;2;FGA;FRAIS DE GESTION APPAREILLAGE;1;0;6;0;;0;;1;Z;;0;6;;fermé le 30/03/2001;0 +3572;2;ETI;ECART TIPS INDEMNISABLE;1;0;6;0;;0;;1;Z;21;0;6;;;0 +3573;2;FED;FOURNITURE ET EQUIPEMENT DEROGATOIRES;1;0;6;0;;0;;0;Z;21;0;0;;;0 +3574;2;PDM;DISPOSITIF MEDICAL (PRISE EN CHARGE EXCEPTIONNELLE);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3575;2;ATL;COMPLEMENT AT 150% LPP;1;0;99;1;27;0;;1;Z;21;0;6;;;0 +3576;3;PPP;PRESTATION PARTICULIERE ET PANDEMIE;0;0;99;0;;0;;0;Z;21;0;0;;;0 +3581;2;OP5;VERRES UNIFOCAUX OP5(CMU);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3582;2;OP6;VERRES UNIFOCAUX OP6(CMU);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3583;2;OPM;MONTURE (CMU);1;0;6;0;;1;;1;Z;21;0;6;;;0 +3591;99;PCD;PRISE EN CHARGE DEROGATOIRE LPP;1;9;99;0;;0;;1;Z;21;0;6;;;0 +3592;99;DLE;PEC EXCEPTIONNELLE DÉPASSEMENT LPP;1;9;99;0;;0;;1;Z;21;0;6;;;0 +3593;99;TSF;TELESURVEILLANCE : FOURNISSEUR DE LA SOLUTION;1;9;99;0;;0;;1;Z;21;0;1;;;0 +3594;99;DLT;PEC EXCEPTIONNELLE DEPASSEMENT LPP TP;1;9;99;0;;0;;1;Z;21;0;6;;;0 +3610;99;SGN;SUPPLEMENT DEROGATOIRE SG SUR PRESTATION PHARMACIE NON REMBOURSABLE (CNMSS);0;9;99;1;29;0;;1;Z;20;0;5;;;0 +3611;99;SGS;SUPPLEMENT DEROGATOIRE SG SUR PRESTATION SANITAIRE NON REMBOURSABLE (CNMSS);0;9;99;0;;1;;1;Z;20;0;6;;;0 +3612;99;DPS;SUPPLEMENT DEROGATOIRE SG SUR PRESTATION SANITAIRE REMBOURSABLE (CNMSS);0;9;99;1;29;0;;1;Z;20;0;6;;;0 +4111;1;STH;FORFAIT DE SURVEILLANCE MEDICALE 1ER HANDICAP;1;0;1;0;;1;;1;C;20;1;1;1;;0 +4316;3;EXP;EXPERTISE;1;0;16;0;;1;;1;Z;20;0;16;;;0 +4317;3;HMP;HONORAIRE COMITE REGIONAL RECONNAISSANCE Maladie Professionnelle;0;0;16;0;;0;;1;Z;20;0;47;;;0 +4318;3;DPH;DEPLACEMENT COMITE REGIONAL RECONNAISSANCE MP;0;0;16;1;10;0;;1;Z;20;0;7;;;0 +4319;3;DPE;DEPLACEMENT PERSONNE ENTENDUE (CRRMP);0;0;16;1;10;0;;0;Z;21;0;0;;;0 +4320;3;ECP;AVIS SAPITEUR;0;0;16;0;;0;;1;Z;20;0;47;;;0 +4321;3;FUN;FRAIS FUNERAIRES;1;0;12;0;;0;;0;Z;21;1;0;;;0 +4322;3;TRC;TRANSPORT DU CORPS;1;0;12;0;;0;;0;Z;21;1;0;;;0 +4323;3;PDO;INDEMNITE ALLOUEE EN REPARATION DES PREJUDICES EXTRA-PATRIMOMIAUX;1;0;12;0;;0;;0;Z;21;1;0;;;0 +4324;3;ICE;INDEMNITE DE CHANGEMENT D EMPLOI;1;0;12;0;;0;;0;Z;18;0;0;;;0 +4325;3;ICR;INDEMNITE COMPLEMENTAIRE POUR REEDUCATION PROFESSIONNELLE;1;0;12;0;;0;;0;Z;18;0;0;;;0 +4326;3;PPU;PRETIUM PULCHRITUDINIS;1;0;12;0;;0;;0;Z;21;0;0;;;0 +4327;3;PSE;PRETIUM SEXUALE;1;0;12;0;;0;;0;Z;21;0;0;;;0 +4328;3;PAG;PREJUDICE D'AGREMENT;1;0;12;0;;0;;0;Z;21;0;0;;;0 +4329;3;PPD;PREJUDICE PERTE OU DIMINUTION PROMOTION PROFESSIONNELLE;1;0;12;0;;0;;0;Z;21;0;0;;;0 +4330;99;PNC;PREJUDICES EXTRA PATRIMONIAUX - HORS IV;1;9;99;0;;0;;0;Z;21;0;0;;;0 +4331;3;ETR;REMBOURSEMENTS DE SOINS A L ETRANGER (ET1 A ET6 ET ET8 A ET9, ETB ETA, ETH, ETT, ETP, ETX );1;0;12;0;;0;;0;Z;21;0;0;;;0 +4332;3;PPA;PREJUDICE AMIANTE;0;0;12;0;;0;;0;Z;21;0;0;;;0 +4339;3;RES;REMBOURSEMENT DE SOINS;1;0;12;0;;0;;0;Z;21;0;0;;;0 +4341;3;;FRAIS DE TUTELLE;0;0;12;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +4342;3;CRF;COTISATIONS A.T. EN CAS DE READAPTATION FONCTIONNELLE;1;0;12;0;;0;;0;Z;21;0;0;;;0 +4343;3;CRP;COTISATIONS A.T. EN CAS DE REEDUCATION PROFESSIONNELLE;1;0;12;0;;0;;0;Z;21;0;0;;;0 +4351;3;PFR;PRIME DE FIN DE REEDUCATION;1;0;12;0;;0;;0;Z;21;0;0;;;0 +4352;3;;AUTRES PRESTATIONS DIVERSES;1;0;12;0;;0;;0;Z;;0;0;;Pas d information sur la prestation;0 +4353;3;IPS;INDEMNITE POUR PERTE DE SALAIRE (MALADIE, AT);1;1;;0;;0;;0;Z;21;0;0;;;0 +4359;99;IUS;FORFAIT UTILISATION DES TELESERVICES;0;9;99;0;;0;;1;Z;Z;0;47;;;0 +4360;99;FPT;FORFAIT PARTICIPATION A LA TELETRANSMISSION;0;9;99;0;;0;;1;Z;Z;0;47;;;0 +4361;99;FFS;FACTURATION FEUILLE DE SOINS (POUR INFORMATION);0;9;99;0;;0;;1;Z;Z;0;47;;;0 +4362;3;AST;ASTREINTE MEDECIN (POUR INFORMATION);0;0;12;0;;0;;1;Z;;0;31;;fermé le 31/12/2002;0 +4363;99;FFN;AIDE A LA FACTURATION FEUILLE DES FLUX NON SECURISES;0;9;99;0;;0;;0;Z;Z;0;0;;;0 +4364;99;AMT;AIDE MAINTENANCE TELETRANSMISSION;0;9;99;0;;0;;1;Z;Z;0;47;;;0 +4365;99;APT;AIDE PORTABLE TELETRANSMISSION;0;9;99;0;;0;;1;Z;Z;0;47;;;0 +4366;99;ADT;AIDE DEMARRAGE TELETRANSMISSION;0;9;99;0;;0;;1;Z;Z;0;47;;;0 +4367;99;ARD;AIDE ADHESION RAPIDE DISPOSITIF;0;9;99;0;;0;;1;Z;Z;0;47;;;0 +4368;99;FCS;FORFAIT STRUCTURE CENTRE DE SANTE;0;9;99;0;;0;;0;Z;Z;0;0;;;0 +4369;99;FPS;FORFAIT PROFESSIONNEL DE SANTE CENTRE DE SANTE;0;9;99;0;;0;;0;Z;Z;0;0;;;0 +4370;99;IFT;FORFAIT D'INCITATION FORFAITAIRE A LA NUMERISATION ET A LA TRANSMISSION;0;9;99;0;;0;;0;Z;;0;0;;;0 +4371;3;FSM;FORFAIT DE SOINS MEDICALISES (REGIME DES MINISTRES DES CULTES ET DES MEMBRES CONGR. ET COLL.RELIGIEUSES);1;0;12;0;;0;;0;Z;21;0;0;;;0 +4372;3;FRC;FORFAIT REPOS CONVALESCENCE (REGIME DES MINISTRES DES CULTES ET DES MEMBRES CONGR. ET COLL.RELIGIEUSES);1;0;12;0;;0;;0;Z;21;0;0;;;0 +4373;3;EDS;EXAMEN DE SANTE;1;0;12;0;;0;;0;Z;21;0;0;;;0 +4374;99;MDS;MÉCANISME DE COMPENSATION AUX CENTRES DE SANTÉ;1;9;99;0;;0;;0;Z;20;0;0;0;;0 +4375;3;OMJ;AIDE OUTIL DE MISE A JOUR;0;0;12;0;;0;;0;Z;21;0;0;;;0 +4376;3;LTD;LIGNE TELEPHONIQUE DEDIEE;0;0;12;0;;0;;0;Z;21;0;0;;;0 +4377;3;PSM;PIED SUPPORT MATERIEL;0;0;12;0;;0;;0;Z;21;0;0;;;0 +4378;3;SOD;SUPPLEMENT OFFICINE DOM;0;0;12;0;;0;;0;Z;21;0;0;;;0 +4379;99;ADS;AVANCE RÉMUNÉRATION SPÉCIFIQUE CENTRES DE SANTÉ;1;9;99;0;;0;;0;Z;20;0;0;0;;0 +4380;99;SDS;SOLDE RÉMUNÉRATION SPÉCIFIQUE CENTRES DE SANTÉ;1;9;99;0;;0;;0;Z;20;0;0;0;;0 +4381;3;HN;ACTES NON NOMENCLATURE;0;0;22;0;;1;;1;Z;20;0;60;;;0 +4382;3;PHN;PHARMACIE NON REMBOURSABLE;0;0;22;0;;1;;1;Z;21;0;45;;;0 +4391;3;RCP;RESPONSABILITE CIVILE PROFESSIONNELLE;0;0;12;0;;0;;1;Z;;0;48;;fermé le 06/04/2006;0 +4392;3;RCO;RESPONSABILITE CIVILE ECHOGRAPHIE OBSTETRICALE;0;0;12;0;;0;;1;Z;20;0;48;;;0 +4393;3;RC1;RESPONSABILITE CIVILE CHIRURGIE 1;0;0;12;0;;0;;1;Z;20;0;48;;;0 +4394;3;RC2;RESPONSABILITE CIVILE CHIRURGIE 2;0;0;12;0;;0;;1;Z;20;0;48;;;0 +4395;3;RCA;RESPONSABILITE CIVILE ANESTHESIE REANIMATION;0;0;12;0;;0;;1;Z;20;0;48;;;0 +4396;3;PRS;PRIME RESPONSABILITE SPECIALISTE;0;0;12;0;;0;;1;Z;20;0;48;;;0 +4397;3;ACR;PRIME ACCREDITATION SPECIALISTE;0;0;12;0;;0;;1;Z;20;0;0;;;0 +4411;3;;AIDE SOCIALE;1;0;11;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +4412;3;;DISPENSAIRES ANTITUBERCULEUX;1;0;11;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +4413;3;;DISPENSAIRES ANTIVENERIENS;1;0;11;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +4414;3;;HYGIENE MENTALE;1;0;11;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +4415;3;;ETABLISSEMENTS DE LUTTE CONTRE LA TUBERCULOSE;1;0;11;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +4416;3;;PROTECTION MATERNELLE ET INFANTILE;1;0;11;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +4417;3;;AUTRES PARTICIPATIONS FORFAITAIRES NON INDIVIDUALISEES;1;0;11;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +4215;3;VP;VEHICULES PERSONNELS;1;0;7;0;;0;;1;Z;21;1;23;;;0 +4216;3;TRP;TRANSPORT REEDUCATION PROFESSIONNEL;1;0;7;0;;0;;0;Z;21;1;0;;;0 +4217;99;TXB;Taxi tarif B;1;9;99;0;;0;;1;Z;21;0;92;;;0 +4218;99;TXC;Taxi tarif C;1;9;99;0;;0;;1;Z;21;0;92;;;0 +4219;3;ATP;AUTRES MODES DE TRANSPORT;1;0;7;0;;0;;1;Z;21;1;23;;;0 +4220;99;TXD;Taxi tarif D;1;9;99;0;;0;;1;Z;21;0;92;;;0 +4221;3;ABG;AMBULANCE AGREEE DE GARDE;1;0;7;0;;0;;1;Z;21;1;93;;;0 +4222;3;ING;INDEMNITE DE GARDE AMBULANCIERE;1;0;7;0;;0;;1;Z;21;1;50;;;0 +4223;3;;PART ASSOCIATION TRANSPORTEUR;1;0;7;0;;0;;0;Z;;1;0;;Saisie manuelle Qualiflux;0 +4224;3;PGE;PRATIQUE DE GEO LOCALISATION PAR DISPOSITIF EMBARQUE;1;0;7;0;;0;;1;Z;20;0;50;;;0 +4225;3;TS2;FORFAIT TRANSPORT PARTAGE PAR 2 PERSONNES;1;0;7;0;;0;;0;Z;21;0;0;;;0 +4226;3;TS3;FORFAIT TRANSPORT PARTAGE PAR 3 PERSONNES;1;0;7;0;;0;;0;Z;21;0;0;;;0 +4227;99;CAQ;CONTRAT D'AMELIORATION DE LA QUALITE ET DE LA COORDINATION DES SOINS;1;9;99;0;;0;;1;Z;20;0;51;;;0 +4228;99;CAC;CONTRAT D'AMELIORATION DE LA QUALITE ET DE LA COORDINATION DES SOINS COMPLEMENT;1;9;99;0;;0;;1;Z;20;0;51;;;0 +4229;99;TXF;Taxi tarif F;1;9;99;0;;0;;1;Z;21;0;92;;;0 +4311;3;DEL;FRAIS DE DEPLACEMENT - ENQU'TE LEGALE AT;0;0;16;1;10;0;;1;Z;;0;7;;fermé le 29/02/2008;0 +4312;3;DCM;FRAIS DE DEPLACEMENT - COLLEGE 3 MEDECINS;0;0;16;1;10;0;;1;Z;20;0;7;;fermé le 01/09/2009;0 +4313;3;HCM;HONORAIRES;0;0;16;0;;0;;1;C;20;0;16;;fermé le 01/09/2009;0 +4314;3;ENQ;ENQUETE;0;0;16;0;;0;;1;Z;;0;16;;fermé le 29/02/2008;0 +4315;3;AUT;AUTOPSIE;0;0;16;0;;0;;0;Z;20;0;0;;;0 +4419;3;;AUTRES DEPENSES NON INDIVIDUALISEES;1;0;11;0;;0;;0;Z;;0;0;;Saisie manuelle Qualiflux;0 +4501;3;CNT;CONTROLES MEDICAUX (CLERCS ET EMPLOYES DE NOTAIRES);0;0;12;0;;0;;0;Z;21;0;0;;;0 +4511;7;CDC;CAPITAL DECES;0;0;14;0;;0;;0;Z;24;0;0;;;0 +4512;99;FCC;FORFAIT CDC COTISANT TI;0;9;99;0;;0;;0;Z;21;0;14;;;0 +4513;99;FRI;FORFAIT CDC RETRAITE TI;0;9;99;0;;0;;0;Z;21;0;14;;;0 +4514;99;FPR;FORFAIT CDC POLY-RETRAITE;0;9;99;0;;0;;0;Z;21;0;14;;;0 +4515;99;AOT;ALLOCATION ORPHELIN D'UN TRAVAILLEUR INDEPENDANT DECEDE;0;9;99;0;;0;;0;Z;21;0;14;;;0 +4611;3;FDI;FORFAIT DIVERS PAYES A LA STRUCTURE DE SOINS (FILIERES ET RESEAUX);1;0;11;0;;0;;0;Z;21;0;0;;;0 +4612;3;FET;FORFAIT D EDUCATION THERAPEUTIQUE ET D INTERESSEMENT (FILIERES ET RESEAUX);1;0;11;0;;0;;0;Z;21;0;0;;;0 +5101;3;OP1;FORFAIT VERRES UNIFOCAUX OP1;0;0;6;0;;0;;1;Z;21;0;40;;;0 +5102;3;OP2;FORFAIT VERRES UNIFOCAUX OP2;0;0;6;0;;0;;1;Z;21;0;40;;;0 +5103;3;OP3;FORFAIT VERRES UNIFOCAUX OP3;0;0;6;0;;0;;1;Z;21;0;40;;;0 +5104;3;OP4;FORFAIT VERRES UNIFOCAUX OP4;0;0;6;0;;0;;1;Z;21;0;40;;;0 +5105;3;OP5;FORFAIT VERRES UNIFOCAUX OP5;0;0;6;0;;0;;1;Z;21;0;40;;;0 +5106;3;OP6;FORFAIT VERRES UNIFOCAUX OP6;0;0;6;0;;0;;1;Z;21;0;40;;;0 +5107;3;OPM;FORFAIT MONTURE CMU;0;0;6;0;;0;;1;Z;21;0;40;;;0 +5108;99;OME;TM DU FORFAIT MONTURE MOINS DE 18 ANS CMU;0;9;99;0;;0;;1;Z;;0;40;;;0 +5109;99;OV1;TM DU FORFAIT OPTIQUE -ENFANT-N° 1 UNIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5110;99;OV2;TM DU FORFAIT OPTIQUE -ENFANT-N° 2 UNIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5111;99;OV3;TM DU FORFAIT OPTIQUE -ENFANT-N° 3 UNIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5112;99;OV4;TM DU FORFAIT OPTIQUE -ENFANT-N° 4 UNIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5113;99;OV5;TM DU FORFAIT OPTIQUE -ENFANT-N° 5 UNIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5114;99;OV6;TM DU FORFAIT OPTIQUE -ENFANT-N° 6 UNIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5115;99;OV7;TM DU FORFAIT OPTIQUE -ENFANT-N° 7 UNIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5116;99;OV8;TM DU FORFAIT OPTIQUE -ENFANT-N° 8 UNIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5117;99;OV9;TM DU FORFAIT OPTIQUE -ENFANT-N° 9 UNIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5118;99;OVA;TM DU FORFAIT OPTIQUE ENFANT -A- MULTIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5119;99;OVB;TM DU FORFAIT OPTIQUE ENFANT -B- MULTIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5120;99;OP7;TM DU FORFAIT OPTIQUE -N° 6 MULTIFOCAUX (CMU);0;9;99;0;;0;;1;Z;;0;40;;;0 +5201;3;FDA;FORFAIT PROTHESE DENTAIRE ADJOINTE;0;0;6;0;;1;;1;Z;21;0;27;;;0 +5202;3;FDR;FORFAIT REPARATION PROTHESE ADJOINTE;0;0;6;0;;1;;1;Z;21;0;27;;;0 +5203;3;FDC;FORFAIT PROTHESE DENTAIRE CONJOINTE;0;0;6;0;;1;;1;Z;21;0;27;;;0 +2205;6;;CCAA : CENTRE DE CURE AMBULATOIRE EN ALCOOLOGIE;0;0;10;0;;0;;0;Z;;0;0;;Pas d information sur la prestation;0 +2206;6;VIH;VIH;0;0;10;0;;0;;0;Z;;0;0;;;0 +2211;5;PJ;FRAIS DE SEJOUR;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2212;5;PMS;MAJORATION PMSI;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2213;5;PJE;FRAIS DE SEJOUR IME;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2214;99;SGJ;SUPPLEMENT DEROGATOIRE SG SUR PRESTATION SEJOUR NON REMBOURSABLE (CNMSS);0;9;10;0;;1;;0;Z;20;0;0;;;0 +2215;99;DSJ;SUPPLEMENT DEROGATOIRE SG SUR PRESTATION SEJOUR REMBOURSABLE (CNMSS);0;9;10;1;29;0;;0;Z;20;0;0;;;0 +2221;5;SHO;SUPPLEMENT CHAMBRE PARTICULIERE;0;0;10;1;13;0;;0;Z;22;0;0;;;0 +2222;5;SSM;SUPPLEMENT POUR SURVEILLANCE DU MALADE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2223;5;HNN;FRAIS D HOSPITALISATION DU NOUVEAU-NE DONNANT LIEU A FACTURATION EN SUPPLEMENT DE L HOSPITALISATION DE LA MERE EN MAISON DE REPOS;0;0;10;0;;0;;0;Z;;0;0;;fermé le 30/04/2003;0 +2224;5;SCH;SUPPLEMENT POUR CHAMBRE CHAUDE;0;0;10;1;13;0;;0;Z;;0;0;;fermé le 30/04/2003;0 +2225;5;SIN;SUPPLEMENT POUR INCUBATEUR;0;0;10;1;13;0;;0;Z;;0;0;;fermé le 30/04/2003;0 +2123;5;D03;ENTRAINEMENT A L HEMODIALYSE A DOMICILE ET A L AUTODIALYSE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 29/02/2008;0 +2124;5;D04;ENTRAINEMENT A LA DPA;0;0;10;0;;0;;0;Z;;0;0;;fermé le 29/02/2008;0 +2125;5;D05;ENTRAINEMENT A LA DPCA;0;0;10;0;;0;;0;Z;;0;0;;fermé le 29/02/2008;0 +2126;5;D06;HEMODIALYSE A DOMICILE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 29/02/2008;0 +2127;5;D07;DIALYSE PERITONEALE AUTOMATISEE (DPA);0;0;10;0;;0;;0;Z;;0;0;;fermé le 29/02/2008;0 +2128;5;D08;DIALYSE PERITONEALE CONTINUE AMBULATOIRE (DPCA);0;0;10;0;;0;;0;Z;;0;0;;fermé le 29/02/2008;0 +2129;5;D09;FORFAIT D'HEMODIALYSE EN CENTRE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 01/03/2013;0 +2131;5;D10;FORFAIT D HEMODIALYSE EN CENTRE POUR ENFANT;0;0;10;0;;0;;0;Z;;0;0;;fermé le 01/03/2013;0 +2132;5;D11;FORFAIT D HEMODIALYSE EN UNITE DE DIALYSE MEDICALISEE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2133;5;DTP;DIALYSE TIERCE PERSONNE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2134;5;D12;FORFAIT D AUTODIALYSE SIMPLE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2135;5;D13;FORFAIT D AUTODIALYSE ASSISTEE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2136;5;D14;FORFAIT D HEMODIALYSE A DOMICILE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +2137;5;D15;FORFAIT DE DIALYSE PERITONEALE AUTOMATISEE (DPA);0;0;10;0;;0;;0;Z;22;0;0;;;0 +2138;5;D16;FORFAIT DE DIALYSE PERITONEALE CONTINUE AMBULATOIRE (DPCA);0;0;10;0;;0;;0;Z;22;0;0;;;0 +2139;5;D17;FORFAIT D ENTRAINEMENT A L HEMODIALYSE A DOMICILE ET AUTODIALYSE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 01/03/2013;0 +2140;5;D18;FORFAIT D ENTRAINEMENT A LA DIALYSE PERITONEALE AUTOMATISEE;0;0;10;0;;0;;0;Z;;0;0;;fermé le 01/03/2013;0 +2141;0;CPC;FRAIS DE CHAMBRE PARTICULIERE POUR CONVENANCE PERSONNELLE;0;0;10;0;;0;;0;Z;22;0;0;;;0 +1617;1;RCC;REDEVANCE CHEF DE CLINIQUE;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1618;1;CRC;COMPLEMENT DE REMUNERATION CHEF DE CLINIQUE;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1619;1;CSS;CONTRAT SANTE SOLIDARITE;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1621;99;CAT;CONTRAT D'AMELIORATION DE L'ORGANISATION DES SOINS - TRANSPORTS;1;9;99;0;;0;;0;Z;20;0;0;;;0 +1622;99;CAP;CONTRAT D'AMELIORATION DE L'ORGANISATION DES SOINS - PHARMACIE/LPP;1;9;99;0;;0;;0;Z;20;0;0;;;0 +1623;99;RCT;REVERSEMENTS CONTRAT D'AMELIORATION DE L'ORGANISATION DES SOINS TRANSPORTS;1;9;99;0;;0;;0;Z;20;0;0;;;0 +1624;99;RCL;REVERSEMENTS CONTRAT D'AMELIORATION DE L'ORGANISATION DES SOINS - PHARMACIE / LPP;1;9;99;0;;0;;0;Z;20;0;0;;;0 +1625;99;PCT;PENALITES CONTRAT D'AMELIORATION DE L'ORGANISATION DES SOINS - TRANSPORT;1;9;99;0;;0;;0;Z;20;0;0;;;0 +1626;99;PCL;PENALITES CONTRAT D'AMELIORATION DE L'ORGANISATION DES SOINS - PHARMACIE / LPP;1;9;99;0;;0;;0;Z;20;0;0;;;0 +1627;99;CIC;CONTRAT INCITATIF CHIRURGIEN-DENTISTE;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1628;99;ODG;DEMO. AIDE FORFAITAIRE GROUPE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1629;99;ODP;DEMO. AIDE FORFAITAIRE POLE SANTE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1630;99;DAG;DEMO. ACTIVITE GROUPE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1631;99;DOP;DEMO. ACTIVITE POLE SANTE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1632;99;STA;SST AIDE ACTIVITE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1633;99;STD;SST FRAIS DEPLACEMENT;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1634;99;CDG;CONTESTATION POUR AIDE FORFAITAIRE POUR LES ADHERENTS A L'OPTION DEMOGRAPHIE DANS UN GROUPE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1635;99;CDP;CONTESTATION POUR AIDE FORFAITAIRE POUR LES ADHERENTS A L'OPTION DEMOGRAPHIE DANS UN POLE DE SANTE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1636;99;CAG;CONTESTATION POUR AIDE A L'ACTIVITE POUR LES ADHERENTS A L'OPTION DEMOGRAPHIE DANS UN GROUPE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1637;99;CAS;CONTESTATION POUR AIDE A L'ACTIVITE POUR LES ADHERENTS A L'OPTION DEMOGRAPHIE DANS UN POLE DE SANTE;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1638;99;CTA;CONTESTATION POUR AIDE A L'ACTIVITE POUR LES ADHERENTS A L'OPTION SST;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1639;99;CTD;CONTESTATION POUR FRAIS DE DEPLACEMENT POUR LES ADHERENTS A L'OPTION SST;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1640;99;CIM;FORFAIT AIDE A L'INSTALLATION DU MEDECIN - CAIM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1641;99;MAI;MAJORATION AIDE À L'INSTALLATION DU MEDECIN - CAIM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1642;99;EHP;MAJORATION POUR EXERCICE PARTIEL EN HÔPITAL DE PROXIMITÉ - CAIM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1643;99;PAA;PAIEMENT AIDE ACTIVITE - CSTM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1644;99;MAM;MAJO REMUNERATION ARS - CSTM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1645;99;DEP;PAIEMENT PRISE EN CHARGE FRAIS DEPLACEMENT - CSTM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1646;99;AIF;AIDE FORFAITAIRE - COSCOM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1647;99;RMS;REMUNERATION COMPLEMENTAIRE ACCUEIL DE STAGIAIRE - COSCOM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1648;99;RHP;REMUNERATION COMPLEMENTAIRE EXERCICE EN HOPITAL - COSCOM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1649;99;MAO;PAIEMENT MAJORATION DE LA REMUNERATION ARS - COSCOM;1;9;99;0;;0;;1;Z;21;0;47;;;0 +1431;1;D/OCC;ACTES EN D (ET OCC POUR LA CRPCEN);1;0;2;0;;1;1;1;Z;;0;2;;fermé le 01/12/2014;0 +1432;1;DC;ACTES EN DC;1;0;2;0;;1;1;1;Z;20;0;37;;;0 +1433;1;SC/SCA;ACTES EN SC (ET SCA POUR LA CRPCEN);1;0;2;0;;1;1;1;Z;;0;34;;SC: fermé le 01/12/2014;0 +1434;99;BDC;PREVENTION BUCCO-DENTAIRE: CONSULTATION - MATER;1;9;99;0;;1;;1;Z;20;0;20;;;0 +1435;99;BR2;PREVENTION BUCCO-DENTAIRE: RADIO DEUX CLICHES-MATER;1;9;99;0;;1;;1;Z;20;0;20;;;0 +1436;99;BR4;PREVENTION BUCCO-DENTAIRE: RADIO QUATRE CLICHES-MATER;1;9;99;0;;1;;1;Z;20;0;20;;;0 +1437;99;MCD;Majoration spécifique PDS Clinique Dentiste;1;9;99;1;23;0;;1;Z;20;0;2;;;0 +1451;99;SDE;SOINS DENTAIRES;1;9;99;0;;1;;1;T;20;1;41;1;;0 +1452;99;PAR;PROTHESE AMOVIBLE DEFINITIVE RESINE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1453;99;AXI;PROPHYLAXIE BUCCO DENTAIRE CCAM;1;9;99;0;;1;;1;T;20;1;41;1;;0 +1461;1;ADP;ACTES DIVERS PROTHESE DENTAIRE CCAM;1;0;2;0;;0;;0;Z;;1;0;;;0 +1462;99;PFM;PROTHESE FIXE METALLIQUE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1463;1;PFE;PROTHESE DENTAIRE FIXE ESTHETIQUE CCAM;1;0;2;0;;0;;0;Z;;1;0;;;0 +1464;1;PDA;PROTHESE DENTAIRE AMOVIBLE CCAM;1;0;2;0;;0;;0;Z;;1;0;;;0 +1465;99;IMP;IMPLANTOLOGIE - CCAM;1;9;99;0;;1;;1;T;20;1;25;1;;0 +1466;1;TOR;TRAITEMENT ORTHOPEDIE DENTO FACIALE CCAM;1;0;2;0;;0;;0;Z;;1;0;;;0 +1470;99;END;ENDODONTIE;1;9;99;0;;1;;1;T;20;1;41;1;;0 +1471;99;INO;INLAY-ONLAY;1;9;99;0;;1;;1;T;20;1;41;1;;0 +1472;99;TDS;PARODONTOLOGIE;1;9;99;0;;1;;1;T;20;1;25;1;;0 +1473;99;ICO;INLAY-CORE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1474;99;PAM;PROTHESE AMOVIBLE DEFINITIVE METALLIQUE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1475;99;PDT;PROTHESE DENTAIRE PROVISOIRE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1476;99;PFC;PROTHESE FIXE CERAMIQUE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1477;99;RPN;REPARATION SUR PROTHESE;1;9;99;0;;1;;1;T;20;1;42;;;0 +1511;1;A;FORFAIT D ACCOUCHEMENT SIMPLE DES SAGES-FEMMES (FORFAIT N91);1;0;1;0;;0;;1;Z;;0;1;;fermé le 21/11/2004;0 +1512;1;AM;FORFAIT D ACCOUCHEMENT MULTIPLE DES SAGES-FEMMES (FORFAIT N92);1;0;1;0;;0;;1;Z;;0;1;;;0 +1521;1;MG;MAJORATION POUR GARDE;1;0;1;1;7;0;;1;Z;20;0;1;;;0 +1522;1;MA;MAJORATION ASTREINTE;1;0;1;1;7;0;;1;Z;20;0;1;;;0 +1523;99;PRC;Permanence Rémunération demi-journée Chirurgien-dentiste;1;9;99;0;;0;;1;Z;20;0;47;;;0 +1601;1;CPU;CONTRAT PRATIQUE VERSEMENT UNIQUEMENT;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1602;1;CBP;FORFAIT CONTRAT DE BONNES PRATIQUES;1;2;;0;;0;;1;Z;20;0;47;;;0 +1603;1;CP1;FORFAIT CONTRAT DE BONNES PRATIQUES CP1;1;0;7;0;;0;;1;Z;20;0;50;;fermé le 31/12/2007;0 +1604;1;CP2;FORFAIT CONTRAT DE BONNES PRATIQUES CP2;1;0;7;0;;0;;1;Z;20;0;50;;fermé le 31/12/2008;0 +1605;1;CP3;FORFAIT CONTRAT DE BONNES PRATIQUES CP3;1;0;7;0;;0;;1;Z;20;0;50;;;0 +1606;1;CPL;FORFAIT CONTRAT DE BONNES PRATIQUES;1;0;4;0;;0;;1;Z;20;0;47;;;0 +1607;99;CSI;FORFAIT CONTRAT DE SANTE PUBLIQUE INFIRMIER;1;9;99;0;;0;;1;Z;20;0;46;;fermé le01/01/2010;0 +1608;1;CSL;FORFAIT CONTRAT DE SANTE PUBLIQUE BIOLOGISTES;1;0;4;0;;0;;1;Z;20;0;46;;;0 +1609;1;CBR;CONTRAT DE BONNES PRATIQUES ZONE RURAL;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1610;99;CAD;RÉMUNÉRATION CONTRAT D'ACCÈS AUX SOINS DENTAIRES;1;9;99;0;;0;;0;Z;20;0;0;;;0 +1611;1;CBM;CONTRAT DE BONNES PRATIQUES ZONE MONTAGNE;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1612;1;CBU;CONTRAT DE BONNES PRATIQUES ZONE URBAINE;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1613;1;ZFU;CONTRAT DE BONNES PRATIQUES ZONE FRANCHE URBAINE;1;0;1;0;;0;;1;Z;20;0;47;;;0 +1614;1;CP6;CONTRAT DE BONNES PRATIQUES TRANSPORTEURS 2006;1;0;1;0;;0;;1;Z;20;0;50;;;0 +1615;1;CP7;CONTRAT DE BONNES PRATIQUES TRANSPORTEURS 2007;1;0;1;0;;0;;1;Z;20;0;50;;;0 +1616;1;CP8;CONTRAT DE BONNES PRATIQUES TRANSPORTEURS 2008;1;0;1;0;;0;;1;Z;20;0;50;;;0 diff --git a/src/test/resources/value_tables/IR_NAT_V.parquet b/src/test/resources/value_tables/IR_NAT_V.parquet new file mode 100644 index 0000000000000000000000000000000000000000..03d41591c5300f610a4cd81fe3282c21f18433ed GIT binary patch literal 42011 zcmb@u3w#vS**|_J%*af3_hfgnJ4u!#%xaL_7kEC1kkt$fFoniXorULSnuC6SEoUWuOXX2=RC0@X zWN?d(>_$ryX+mwy0#5>#Bnb$sS^qb{vifkck@XF#vviQ}QnOHrGJAf2HY3fT#or}1 zP$%5+J*c;=N1u@1V6l=0n^ky-4FnMkBKRa;`w*!U9wLptYmN}x==ZKbH{_$_+rc*Z zh4)C4Z-+sMcoA@P7?@2$S)a@{1-uXa5Rhie#vh^eNH^#L(j060nymQ!98ZJ?|MPtdDIa&A!E; zPYq(5>$DvJFY2%JzQ;D#YjPI&V4#6}GK-tPk8Cht*-sh=o7md<@OIyRwyEB` z8gp5Q87nkq79DD;_f+!DM^G>l{tFG3WUP`j8PxX>+uY>E8@~^D8U;zvlJu=d;l=+O z@lB2Xq1AY*QJc>;HCo;yjcOs&n%NcYKy zX=yEKHb|&s{eHn{VnY)*A3uHTjTO1$2jK8hC4miA67bDGLTsSE@0uThQDS6l{-0pn zSm<7qI}G5{K!b3Qt@8_V7D%0QHBajH*8T`V8tiTQHaQpLfgggsNfi7R;T7ZiN!|7x zcwoNq)Hi{x3xNCTwxhry$h)J&*VWak8&Ucpfvv0aRgyYyhY>&_trmeS^sUCrW*#97 z=O8-yUBJQ#)cpwf{lj36?*s84>Ne7A|F3`tt}*Cn`R}H9$=CT_9)-MoF*HLR1BK%` zkM-9s-9&2p4k2(9PT%d_4i0|CAmfScKLXo5#J}CM)I$7AEhsYBU<7`goO=aiIu4UF zW<*N9PtG$1-tVvT%1eF7{1)LIlE?H-LU z=Vjwr++ZQ~OD}L9pb#9x7(YY~Vfyxd`Ts(AOxl#o`hy!jB03GhlfVX8dRc-v3Jl+IG{LzSa{WiZ zD7qE7&lp9Xu_9_{h=BLW{|WjkQP^QS0^2D<+bN!qIQ}pq;Xxy{!6?*VL!xH1DhuN; zBa{W_0W^7ZCZbvJeu~gHfdsbK{s0;Me9s_=*rm{V!fMuk820YKaC}X%jR?bVf+3Rq zHNpltxANZs=Tb9T31##>Sb^^NVQ?9M+!aQVY}#lc(nbr9rhPATXdelDlbo?!l*z&j zBecmF8Py2={|(vdzXr52Hs86~X!j4nMkC|@9Wt0}zh*GY=n7PDrg#JzBy`#^Glnr{ zhe5hSK=5n)$c*s#<|D9ykh%@z&>e_>9efDSKV)R1@;b?46#p># zXftUZz5szN^kE%fEWUhClhg05%)ar#x`H z1K`Dehmn_pLO`LGt~Yj+BsUO$bW!jW_!i!%F`F(mpjxe-B_9?g2Z( zGWm?8)){|Ai+JlGfA12n+N?TNaG?YW^@DN&Pb4h_A>!j7UA4AY(x|IeQk2 zEIIqwH*nPY-Q+!1l&-V@X5hJ{9fv8lkJQEXv2_i@sNpaehHt|+0VR!0SoJ>!sC2P# z1g@b7{Sa(4ihMHzO|-x^H^{a^+lc7*08i}0V~1daf16Nv_Qv7$NJdW%bs##to~`#| z+gtDV&L{QG3j{#&Fc@hw5nXRP0v$$Zg@6oO_wfE~7|?AL|3AP?o~;iIuKo_-S>C9Y zw;^L#23QXMH>Iw@cLDhx4Pu%aeiV*8F>VZh7QB@RWg#1t_82)w(3}k)#SM!|!{{C( zaeuagCIrX71IApBe;YP*lLmvNrfgM}S$xRA4IN0W&Bn8cJ!AtYI7-;Mpm#mMx86z2 zq<-mqw!RMAcms^sHwlSQ*L8hQ9!WGx8Rfm0hQmd(ks~R!NUby?TS@)tmD~4``tAFS z=n9NU#vOQU9Za&Tjt(IeCMhNPZR!p4frVYCA%(F0(X#5RvhNNWRAYBd`&TaAx6f5hL22 zmj$#F#!=m960d#Fc-(kX#)(G5`Sx9kVR%6e&dmjEgJ0?v4r4aqNb2AoBo6LDv@-7q zY%U-TOKXwdHxnQ{L>ji=fxOy0#Gtwx{9f=1qThn87DNrG*lH0S#U!vDu7>S1zfHa; zF$i0~OE5S33&0kS&r00opv6g`>GYly>+r==8%C94-6&m5C|X{hg<&f~!{qxQRl$r+> ztc^N~9Y~sGHT)8F3|Cm6gFtgLV8^7p|{#Z`;P?Tu|lScE5YuO;y{rRiWTb+tyq8V9tao2#YxtCho4C3^UE`uFjdT zbEkV}g~`N;d)!m+a=WLx-FG3$?cR9-W8$`L+lHzyTrctyFI?|lzkdCNl?E2_rky)? zq8#I8*H<#`6V|V<#0w@EZ@1?-`3X-vv3`A!x44}Gy%w&zUT%sj?DT)b^tVXkn~HaGJ)ztcoGW}0lz!7#dS zvgVp@Vk(MnLch?w%}hn%U4+<)ki%3)-8pEvo5(0vPHb-c5;s#se3(YUl-%Za=a47X zS7NZ%m+}+-fNlnIxSi;-d+N@+tT_|2eL`?2-iS%PW_pI+(b4R5+(|iO=}FD zmQ*qZBZrifj}v%Sp2`%mS|{h@Ls}<8(~2Ia$c`sczM7O8_tnI-xK9v7GL^A_R5RM| zv+xL(@G+&6t3hbG5n`&)ndt;6d`wG!jc4$R9%D`;n&nzHr81OHCUj;ZQG{K*hKxC~ zmP${p0r;3ne55@oOe87J$fvB5VlL%FnK-u)7{eP=*rY3pVC5Ah#+}1!;W*P>tVGnQ zH2|)gO{!c7wI;c@cr6uYR*^VYj9R$g@=8i!8j%=dCXtw>(ysXMN`-qJNlBa~XsLE) z6EMVF#cN4Lki5TSwWQ}#yh35hS*42`F<>npk0-c0vpAcN$5hVBtHAq!ai*bGRpa0k zlGJKaNsY<1M&dQ82xqsZlQpR{r?aUpZZTd@NysyYPpMsu!ln|QRu(OBun~>>nE^&f zH5t%ElEQIpEW{P_sYDxh92-ds6S+QC3t5iiR`AJMKT`}0V94q~^`Hh&5Y&Vz_t#>| zn~2{aOB35Xkr}t@;TkQ;-NvWW9ZWUp5WpA_?&@M)4H=TFi(AjnQZkI2)#IMuv#QGd z1{2ZBCv}}k+Pgxbnl6QVlTU{uOf}mU=7#vTaGmQe689}9aqn_IuErUOsJ=}Gv1&|f zVHKTQNOa3YHW6iJvwA{(62PrX>WQvAyfHdQsfo^M19gHtVZJLdhp8bUA;?A)p@!(5 zb9r44GsUc~bNkp(i1V|$+Iuq_*0>$u1#~l|3dJPEHSr-;=bA`DxW;Q`m6)f1RU)2S z*p4Vyg^v31gnB&Nk;M3QC|nsD=2=3L7H}H(a|{B;EXf^U+d|v|Hk9`4WJ7aNw}w>% zCc-EkeiK4^29Qpn8pU`><32hZXh5AQMU~i;exy`#j@Hc1X=L`Z2+d^YG&7Wqq||0U zlB!coMCV@@Wr zkxtJ-RMBOmfHO2a7SV)DiKgx{cxNt&cz3fAom)Xdebb0y=|fXvME72g+0>ZZNtnBs z#Jmq0>_yqGc41=QFy6pD#VfH{E@A}d8W3eeS(Z$3_n^iHjie6&o-Y9E8a9~_%1AUzKc zVbf`(r^DPlk{SL9o2eJ}5B{4~lAi4b90VBCGI$Li3Uw>q+Oh^?bYKs!bj{&h zSfR&w-sNMlFk=~ffW&+}32)g?VjI#VtoE`oRd5iM`xgRSBRebP`h?74+Q=-=Y%**3 zH|(rP@7stZ$t>@EWY%B@etk&fy1<>>3#=0A%_9ojxolWvjwf>s;?5$;K|3D~>GV&0 zdTy*n32Rm`_uQBqR+E~ZR@zm@&NOta-zj7!#Rfhy&SMxM!Z(~Qh?On z@+NwG5$VLCO{9}M(O!p&S@tn$Qk=e?*Mk6m`!!a}hs_HUBk!F>a5z0mZ z&`PfWq%(4{Mi!9d@EBSxoIRXDWfz(OwI)8947u(!N0Lg7cM;V3z-)UeQlrOp`Wj_Z zNsE(2Mt=gW#4YTC(buuXcywbfcMyGz;K_Nh$7i@Stagy z4-0t_B*Y^ok{)!iY2EV)X@9>Q0-(Kv`x26=25NhY!p7sChNFP=aS|Tpz|ePro?*lO z-W)a@$O3d(IB*m^tFWMc4js=u$m%H!yDpssJ5c*I(MK;g2-W~f8-QW`8yjoq-o&E( zD_B%`#8COK5M_Kd`t%khO5Y_Y==mqFt3jm<1*%9G&WiBxC<$wjCBwsPIN5t1nq&i= zhRi#l-#BRRuVDy_>Yr94y48RcN1%5cp)7cO2ze&pRVVN%B{}piAJJ81MsJY&3&x(a z2%qqgNK9Qxbf%mcCHnSPN&A-D*;Kn~=*-@EhA76g0MNYWVDA~$U|M|HSaH+A9Z)M! zKu;UQ#%sCf40w{nhwu5nk??m+sz`E)?z?`tmQ5ywaS~y^AaOCxlbGisEXpS#3>Crh zGG^R8a3(su%M5!j1+zZ1rLs>VUD65*;5C!;tUO4SKSPf~^!F|NFLhxdd`)aj-~AsF z7Ur>`l;=XEPGn=5-gZMj{SwdJ1hcDmp<#d)gNb|32MqTNpGDH+7h{^IQ9S8LO<_sL zAgnlpzjcYtBOTHS7KBnZqQiv)GjIW5%d42N5+gQ1!qPI#pETb_+WMX*ZPFhpWP4tg=e99mO&t3mXaMR0rUz0@hn&tih941_yHEo=WKgYeHP#u0-%2#VZ#cQj>$!D zR+CxMnmh~`L9)7vbPNnY-}u>Nm|KpN-Zw~x_ZSqu$!5g%upRBtsyD9qe zsTHzxA~q#rE~+wga8k7rMc8LVpqqaNe(Sv$V_86=;{w}}@j&kj9}>lz__||)v9kZc zwnsg6B(7Zo$(88I?fVxS?($^VL`*%OB&4k(;2bNPNODiHi7xR&YL0dLYn=a~g|TiJ zW*v>L5BXSkBV+1cj=pmvP-bli9KhP>7-t({ulQAv=t@#f& z65IVLuckD*MQlQ4HA+0K$?Yi(onbCu^3AD4%0_>dZ!5JYm1ricN?|i;AAijtykOu2 z5*aRn7U520BdS`2MKZ`E-LnI4uuSy)iA{H?)fnv;^IKs6wE|K7NszM}d1i!R z;B{_ZWj2YGM7nUL^3r>hPxQMUe9GvN5{bs7`&BDqC`2CpSLCO#=`6G?LRPEP{OIFJUYR~TTl3ehfAorlp(V47Yjz-SIY&V~D8 zB<(4{qo>&L=3kn!U@$`&Nje3abDC}YCnT!o6Mg&?tjT^DX40uxn3~HlO&f+PpugD&o~0=8K2h87#(c7Y_v|Aaiqsko;CPt=1)p>&3C zh9tVtLPFk?oSP(~2}4BFYA`MHMZoW%{$d9tznLhbmx9a7prB%q_oFbvqVvC$H6xQOwNN<&x%&;+`4C_nqCzWrQ}6-ih`UJn*pj~S*uCO6e*EW5`r zEGc8+LkHf5-FiCu@-IyA?)Qy}FUNbQ7aE~2G5x)*L>pd&{Y~#&$dx*=~A#`PrSg8i$NfhL@tNBbdwas9|WV1$&j}k>o%ZDt{oKjf8uDhOX2W zEbE+*x;p2chdl*dPfb`?HX+3vLn7lI6u^E8-Q>Hpj@=Q@;~3Wcd9Y(VEv()SQzI;0 zW?}V?-4LhPfpth4W%ZEy9}Ga;n7(-KcYqDo_CEHVw6CMyy$EEC2aZCn73zGT03gH& z0^rFmjO3Lhr2Q6)Pz|AXv(YR;l7n;*|EeXSV z)umZxaMk-*VX!8Tkd(p$M#NGjy=x({@%M=8{3q|+Wh2VKWoXs20;_21FY)^Ikcd@q zQ6=bNY7W;VLSkJW35%CgvmSz3UY#0)6+ieYUL=AAohS06Q@{*&W9)E0D|yJTl@e`m zAFQ26VFh9#wo+KTz}-7xqi z8*1x)o`gm*!$(N^cdj9tVo3Q1dS6xua0n=f=?b-p2+obbx(p4!Q65S*8t%MMvI*Yj z8{g&}_wUGwIzF};9_U|uoO41xG9!spMA6bRHW3|(a01d)Z73<&ft>}UovzBEM7%Sh z#nY->8H#1(jzmhAlS*7uQ*v09b!6$@S`tqZc6Mf2Jt5|jrPj2T)TI)uUCD&CgwJ+S ztzfhQc5XbR%D!p~f%|U{uXW0{64aVXhSa#%#rV4uv1Am^v3OcWUTZR{L=y3^EIT}w z>C@p`j44r7m#50XSVGfV3kdJ4lbHfugr{{$Q8lS6k;+qQIHRRe%9q0JjA_IUA7dh= zwoW4sNti||gk>|6N(z09M%8JuoK#b3)w9yBL=`QiT28yHiU?_97t%#bSUCu1z!avi zDISKa4wcGjHI_`IxSo#i6fLeK(^?w(MOM-^v808WTHGmPoyV25tAS99jZf%li%1-T z*rCSdkfO`zi4uks*Anp(p~x86b_`%9>D$CDvE$L05@1}8^@Rwb>ml#R?2Dp0ibbPJ zTOy^T)eMF{txY~LctU$Bq3bICm(xn7TgIqqab(Gh%82(qGeV^$<+i(m5mMbpf^HIk z*`amLDX&c?GATKfN#VH;Rb9ludDPi_@d7t`&8m8*n=pUn!j(~MUR4Z^5(~LT*|@Hh zYPU1n+sX-_jAly8NI_sZ(~X|TQCkXwkb)j1M2SWdA;ooHp&U_TF;(ta+kXskGEJn@ zbwx3H8bc3KDz+vWNZNVU0E!3BM>@8?Ya(1hl}hMziA z&J+0egrR3`ZMIV@Wi1|o4;bw3WqN(jn|BS$vDn($a3a$d#oUL|T9?KYaor>oJd1&h zAKJhx?eV_9@DVM}yj0wUfyDcDt=G~v*iZDn2gq@6wUebvOL>kt4pxmhc`S*gzGdZD z6qa%_HTW~ab!_jfl;t{EjYnYK=nj3%dgAqVwHu?M^~P)sMfJ9Yq&&Egl+;_5R7y>c zZ0>oJ)&1915?)r*MT-gz-$N>_kC`$XM&4rV+;C9=v%V>*#&vKX`qZUFweUz$vC@SX zz&9}vDr{49s1}l{Bv&>Qz2!{~JD;uq@#L$=Y{`W7e6vRnMSmSIl#PLk3&G_;H;{m+-cN zav72fCUjJbSNljkmKao19F?c15mItplN?bpU4KAp?ZxYAi0x_8CLuAD7=sH=Zpq0Rxw+^%Ggf6v z3D~1iO;=U%HX)o+LLCZZXj@ckSJDZ{l95h}WQ{1HOq4mhFe0ZDX(h_UBa?`8tx)U0 z?4(l|fC%J|;xLDlq(*yqQu$#1m~1H`(f;>XElhu!TOQ8HDA=WF(VlyX?&$1f?mVGd zBZ|^2O~V|7W~pg82IVIa!L_$UHb+v5)_Ws&yBTsS-9Nz?OkoFKtiB!9S8R!VMn&>P+XtIVOho{hipS z&Li>d`P9m{-Jk1uaky?&BWl01A&1pv=6Ciu7)wo!l|FM7*JKSD-Rs#_4Dpv%+Y_mt zcSyLuo`=8~I4Ep4N5Dzq9&9b?n%o&vgh(|!M9snXN!wyZsI%M&=`gfzV6`<9mzDU? z+-pCyL*rmE(GE+=%0s?F%tA*h5vS$+mQ7I98a>leiK22i5z|6il)H2WUli_vrAb?P z@C-DA^I8FdHW5*E?_*|YfMWw2bFe$B3D%j(94<92rRlO92D{GD)sl1P$Rtb`n=96u z=mXTx5q*$fVloJ-IYjh)*S~JgdY5uB*7x?0sK+ zk*fbmClk>*;DHR}C)iD&3*PBS#HA_N($d#+;c||rdn|TnvX~NQep(1E6;+cR-VcU8 zVJ45YxbpZqImApfN6~cG3D!zQPp8EBCe~{jDkzUCaWF`GMvK$;D3@neyR>-E2lv&r z^-Qdkm8h%Niy;^93p6JJj1lR9;m*S^#0N+>+w*8iILR!I<{Tyxq^-Q zhGw@u!LqWC9z)GtsD$~My)6?8C456+qKue%^q#x89O5S~s}N$D(K>TdpA!;So7jx1 z*jg6nI!b$1x|Z^KBEz%|y(1LUt1abNUswt0R3=3KZ6@m2?8+`B6oSsIjA>D?&8mCl z8W{^VyN{&%tkS7g`WUq~_ae>878*}4j#K_X=6O^q?(hA;Nwjl#Hc+EWc!O#Zy4?3NLeU8ZtHoD#D^{vS@2%}f(Oq#>1tEyLX&IzNi`(cYa(@k zz#J094r0H1uc@VnR6SyHHj8ezd0iE;-J>{4?kPzRJc~oUqVpXmkyCsAP{HJr@R_eW zKg=zioA3H7iT9pTtiyzhLK|>T(sONWVh&fz!!p2kFVkc(#TP>SCgO=$Z?YUlOjw1v z;!UqvPR<-1+)U&Ro7bK}SZ{buH=%Pysn@=8C!!}`)j`O#$Qu)_s_YNS)MZ{%%CO$~ zYZekdmWcPy|KKOg+CLK>T7de6lHWf37(;s2{251s{cG81^y58*CM@nk-WpZrsA=&; zso{~-y#xdQk>xzzI4&Go0!zL?kk-PFKh{$S6DtAc9jY%*rj(SX#KYAh6Mr#>_0H&d zXXsA)cCJ)y>Twc1X1(wpG^CzU+@H~rsY5TVYa+YvB^vVx&d*9e2L$d8G2tcA9H0 z2UBOTNaap-L~}l4vbn~-s{XUltgRBeYjEToI>AFTHlA*Fuu(XHO!V&pld!X(YHOk8v7X7QDx*bI8mgwq`b0$bAsA0O6q z_hTYm&J#s^T_A0i2Lp!~R#xcMMLdivT0*tESUE{=$`dP1<|#^>E|#0b19mH9Lq?}p z3(VC-fB8n?QV0t@&#};o-GkCji|B!y)wm;{#|DM^i+DLarRAO+JB(@YAU#$T-*DpC zUY8@{Cc%}tyct(2A{P;68;^xZCu1u*rDbg{)NCPned$D)mZ|KyVi)I~0iDkTwRdD!tlF+UN|MI8l_IFncdBW6E`UhQPOJ(UBJj{Kl-B zc5OWHALyh6{8)v&dSS~VO8=Zoue7;h(h@j0#FK5jjHX;tXbnU4PTH-)Cst;ki|Yo7 zc=cpL9O72WMiqJ)JPcj1z7yPJoZ_h|u`|E)Zr&~{u|$_9z9>3Qr^gq8Z{YEd*qOVe zoyA4&*8P^!my7wBnpS>ARldkKvbiE`6KkQ)RB4T!zJygAqfbaMeVjh1S789r-*D26zMnT7VnGb&wENFwx!VylMR4_)3Z zRv&f~npkFXoj1d5SHhi%c*rx#jFDQ{`I8$2rc&7sZJ@=HwAo=+LUJwrsb$LqZf=p! zFv&FE)1+4EES*&oefAjT(*{lXlbCI`+GdcGrWsP>=Vfu)V2-v0@w@LQRM1iTQLH55C8npDHR^qMRR?M0hl)|6cend)sGR zAJ_+R5o^mtrAr+>^Zjjsa^hV+eY%{;$TMVrK<>c)RO{+#z@iI{{31|I*s1aiPsz#s zH`z}1JZ3b+`ERRhBClq2vA}673#5rz>4L?@5>1^#3R2{y?>HSX`wGq2L8!FW|3TEdYMF`H%3LL*J*X5`N(-d8$kLBGzgSa=Fdj7!b z3O&OF2W>~L^Sq+|ZS+w)^QIL85(~9#X2>j;DsJGJy<}GZ`S6a0iFROz&;)}P%BoNX z>!}GJEyHz%jF>0(h_ee|3&l2F$z)t>X3uH;jKg6&yp!0urRm+Sij4*0O1T_Hxt4-S ztEqv9g)O(J6|~GmpA+ajxDX469&{97L{7vF1&K%X;_KzD#n=mRPEnwID6KMY+Gd%3 z$wYYQRT}ZwVT-q^>7M7IyIXkMo+`7WG*rmS9Yec#?3sop`YEf$Pxl0VN$8i=zqKKsaI&v4k%)C!O%Q{~;g1-S;v&Z&LLY#IUBYBc+=K+wA`BtlUL|R$^)> zw!)@tOEHbkEA5-E6+gC+luKZvQ>1I{Or`AuJ3I&m*aYgnpH`q!C>eJv<4~2c+O~f~(l2CpcEJ z3^nCfpF*tgOv%a87X;`sEx!9X%kzWBvzhqF$3&%%OTKBWo)*tD1;!@GkP1mzNrWAv z=R-~pPPG+@bIr~ZbFFeaZmrRKob(*+ji_@3J}~-fkuy`hKCU z$2vQMomw9+K0cWZsYUbX1G#~DWSNijqy{t6LB9VIYPX2LE#fSWyHit1ppQ7F()TMI z7OU*9t@Y3sOhqE@+tdebgby}&m_N+^6TL~`6R}vYjN@_7CT#KJ85o9RxxDOepmz&W zGmC+BoKERH(+sD+Xyru5L~m+Fb=5kouYe!VEonr*!m7S*<`z?dZEer zoXHUG)4G}omBl5^L^va0g`d~9YR&B=ZZlhMhxixH3?3xT0rR?pgjAy>nJ&KKIx$h4 zXzPS!u0{O&=v2eVrHh{db=`zI=C z_ktpEHcz5-c`?MKw)Z-c5*y0U7kEyvAH7=);5IgeUf~)`-=UVewiHj2^pAZ1Q*aeP zbuOU@f}Npc_R9bY;XUZL+nL!6zOpE(=77@P||* zVK&oh8~s3JBMG{4GEBHwBGo1~dgw|!7Hy0knnWMOO&@2oi=HluYs`WSzTvT#+ed`hppYS-{-yM`AoUv|csr>*K))wg_^xR4KEzbE&uUeUX9 zRo@xCOz{aTdzURg^Q_*}PU~ITyKI$w%9Q0RR`s2JW-qsd5xj$)Q+SQruik7sW?)iYu&7-)1OWfDoKs4Vr=cw zS-5?etUBl*a%(Gbwa{OCybar^29t{aG50TJdQx6-YS$pF-d%$|TkX&@ zv0a0^PbIRb6%75OAbte*Kpvk_Taccu~e*@1V{IrL^>^AF`4NO%w<|n zA=3SMp8wF#a+%s0Uz)@47t=dy{e~rI z2ql=;*-VNKl=HGrmQLaAa8+c~C{qr3uhBnO^6^lbv1tsqxDc-TWCn-XToD|Y^zSka z=ZhZSvMK@_%6|;=L0|`QuCdU+=Wbt29Bm&EadIi?7Kid?{36g!Y~?gm7APY0H${$8 z%L3&uCus!Ql+<{160w z?kCDaF7mR|8eBsq=aeiL=5Y@TFS~!mLoSx|(>>CaF=eDWpB=dt zXGTJ~Gd-15o+%M;&rC>=+&a3|Wtl>YZ1g?8zb9~r_3dG{iwAk}34TKaTEd0Pm{IXu zbIHEmcvYUwb*wW}PG2&Mk-Q}}#B$eVQ%S=iJ_DoiLi)F2v^qM~jzbC=H&xA!o%E-) zWcA$7u`#OLvY&3B)bbYVX|@F03K>$q8HpLbWSV2LGMSwd zr*BE(^|Yi++8Dr%+Qh)Cfn#%cB^(yxHPShhZk(#TgvXUhNbB(Y9;5J8F*n5(4cto9 z{N7ULQg^9Cd9&na^m6z$(v+Pd{yul)VUnu;w~6T^`dDAFo=A3Rid-o@U=fvS!z1UV z-PVAgU>)Lcq01$UkL1sV?zGqgjU-k!pvR)l>vEZQmrf^o&vaT<*Q2zZHDTJ(kvT~AgBO@*PkXZFZ6c+`zR?Oku`L<#>gxV6= zyDn{@J$cJwgvbqVjJWT7SE=73+m5C7JS~@%WzS+97|wG#levesaCZl$66sn?PYuzF z`VL*hRm1l{)6KNdJaSF2CWnNSPU*`s(nW79W80#;uVFlaYbWsOM2413?%gbkBBn+z zYMVkfjr9DUk*hxu%AuTbV_3<^{*|SK1e7bC&C_fvQych^F-zs%KULsv$4pBxj_1wB zo$D4HvFY^PNkLC`k^RJC=ixD-?>c&Z<+-19_f1?kYN1Re zaa6w{RWGb2`BlymTNDR0{w{n+6Hwl9EG&nvZI8n=#?jF{*!{DrZYf~S#I4*G`gk6_ z*~whC`zbozM$f3l1U^uRds2INMrZC3uPBs;C)1D^i_i}9jF<1Mr!TmNZWyX?Jz^^~ zr*Put>@8x|Y4rMH=hA8ZW~Q&`Ei=6Jv7s%(Ma5GRZLpT%xc$t+Oe$(1Q>vaTVVorm zCA~N)o53WRw*~Bk6YZLKmaVgZRP;IMa9+vyfv9mqls-B`Xm|e2;oM#9de2?VyN56q zGQHC0s%K7P4tZ?cg@N;Y#2d?BQbsDy;cY(g`QY61i%5;{4x;Z4(Alk9mceJ}npaX$ zRcfXS&BBA$DH!thp6PbEa`Ez&XH-3d+3gctT>F4>DpiLF( zde&9KEvDjxFsWyv;&P|CEv1D;S52v}+_eiXF&?MQ)Ds}xu01^V|E@D481k4nzkK9| zsz=2?KY$-v( zkuV9B?kk@Wk>-}U7CB0*KQ^BJ2o9FNl_TeU#FS?o%Yt_js^#Hu1`K06gZAZ$zb>Q4 z=L#>-%f-^C%S)vyX=A?U)}GhtJyz)&8=Z|ae_QWU>S9qUX2S{bg(8B((#}TWX=2p` zec4Jf;^P5d6TKz(_I2b+5nM3lAM2wpPQCqd(itMcjchv6`Yb~~E*Xyw{IL*6NB)Ld zPlJcPYbMDxbMyVPm9yj1NY!|*ZK@%*6rb2FNUwQCc?P&q9vZ=hHAX{b_n2_r4S`fH zZer;U|H$#h$#g6wLu)e??q4^k)s`aMF%#`G$+QZu2~cHH_M1atT^>3#vRk}97iR}s z{$a(jN?YcDm2vky8^k?+Y)&6m=1wMjS_zF2dWuNGLqA6sR^$d25E!0X8Yd*Sbq!V2 zWwNak7Zt2@LqeNLbm$CbvHKwzr_J2s^dQAyv~dbL^pmIdEG?|qm&b*AGESdqXvzW~ zasIW{GPQr%0-Shgai`M+KD)W75@*fSXD{7SCBEFoJlyIc;^ny0pwV#)Nd(?E+u_{9 z0Syxt*HBVcbt*1kPjTN!$y~bLYYwaMomym8ONn9LwlMGyZ#i7`wSKQ0(W3MVI4kLi z0<2!0{?k8l%z`?s=SKl`2GkqSU_j7-Mgy7*Xnwd39~ssn_~=&*@qY)pzGCQBdG=$0 zHI%Q6gu499vHktx`)~F%D6}vOY@_ zuf?T?5M1bg$aCKkrFRDDCJS9vU>sR)7pyo#i_W1JOg4Cu-cmFZ_bjGj%UE4oTPt@c z(_~+fg%%W7eXeY{h2A32q=`2&M_R;ZNtkB=(L7r28$PD*RRC+gb z9j=(fW=U0)T8pUYwqoI@I;f-^SwHWNs9jnkPJOxl2}k62?5AvO?;__bka+MM0Gc46prO6bC*Mt<_DdQ zeEL#>%_&-JX50peTQ0?6f;2-=0W7y5*|kb;=#fahx_7kHl7e4XpmBk@YC5U z=#W4X&PK2M`Vx%f6^+=-S6yA?`V^ws`6#7-X)A`vsZ8PIkKR!2ygx$Ub5#^tnV@)V zf$enE{gGX)Y$0k-J6yDyRBP+aIP(j{Lq1!b^mH*RhixUUze$gIJ>9S!qn4I42;F48 za+cw3swr``VDG!Xoz)W34(H$!da2KCtM576tm1%>{vwPGpXVl9MWDErJ9*mjm5X}% zR>|73Wy{YJTPuoX_+c};tciVjmS&snUj(35SpX}BG2 zo?*C@r-*cSrCE;4{tMlN4VbIuTg_TTnN7#+wlrhKS%oe>gu8Hw4OS}-^`RwiZl|FN z$(&CYIBgC15RI;@hjSnad!+gxQ(Z{d>Ol2R2;(&S8)Q%Qe1@@;b|F$6z#XZGqWjOn zK|z4tCH5=sjBTudZfqZTO1iU*epQt!C#_H9l%CTdRUU7j3U?q3?}};g8sfg9uYJo} zu_2P03Abh@74!6NIdOrsx;b;2^wTM9_Z(VPLRx3+z}I4$yL2iE4SizOllU-F3RYMR zZ?SA?#2u7wx~r1<{q!xjZRzc81P3C2Eum%?-O~9Nh1jDS?h=H>oBY)u;nP`hpq4}r z8Ly_Z=vR>7Y1-UGugvGQcqWiH5$5@#65ov0e@;_u^wA%J+vv##zV|otUm9g9s=2N^YKJ&KIYkz$=K4_$J3@}_eh1< zY9d|An(8le=!@MXU9sv|80N!!Jp=UKMrhcVAq$;SGw<3zvWcXsKP@Yo#9UDI`K&+% zaV{&RubEDQkbyL$FPB=X=nc|IRiyg1Qt|CldU@_iJ~$WY%SmPBNzmcCGA7fSLM(o< zU`-{Fi1B@GSkyd3DZM+&>==2PzUssQl%7n8*Qb~hNrpKuP)X?O$(@yCXshRQh)BehiprlKigcrp&01x0+u_yFWpzWQRm=Yv3pp!~+% z-l2Q?u8>ZdJw7xx=&Y=vq1>^{@PUW;Sliz9<_@@_hSs>sM2m6t1pnhO2F*P0Cg=mscJ8@!oG|~#qEX4e*$G*?rl##ABt+?3oY~=-Q1zVHGHotEe%xn z*TM@&$+65qqWb@};YsmtI=!5yKkc+lcm8H3o5+m)jw;2hs<>`=pJQe0LCRsWcxMsC zk)rj28K>JSZ4LOUX`;=2O)H157v;8@IE(HI5$1Aw5lkELdA{_5X41)AdW%Zp>j`tW z)0P4!#clhVi>n_aMVE|U-g-6eu2NT&SL9&t%@~(cOV-RLsVm5VLubvyEzRoN2$|Ay zF$0Hm%a;t?!i(NxTArRzb;vxn`NW3;H&xnp4Bcvbz3*iD=i|({==&kLnVi*eW(Q{3m-MsjiXAzIT< zjnS8nlg^w?uQ2tWV{3PKJtfunD~n;N*&>ECmrfsPr&l&h&3W`1mo1D>F6gpuDGy&8 zl#c9}v1ugV`3XJ!z7uyS)rCqJ_q?E+8!PExf%K?M2g<}Vis@ekX@NlWe%GRQ=ET)S z^}@c9SNkvFTW}z|j4-7nBs$v8T0pM4l_3qrFO=?e=OTl}g82p3t2*fP1)qr|6>ML0ujJvIQlaEi1u4}$CG#~MX~ zZ4VP$J!xEkaFYSet{dWuaYjp<8t8@5y|_Y|rf+oc=}e4v2_;4JPP6+)bM@a@`m^cg zwoJIaf1)*-Xj7uDY1&6syKr-$2Z!3YJAk{zWi3pF9hVrxa*O-o8U1I8+xUuN1!k2U zTSGnB^8P3t&8P23)KS4EeA0)r=q2T5TvrPTd!08>+Su*R<(aR3nd;BM)e`9^jcun= z%GuoqTj*)sBMY6CJf0HqC(RPfSGc#B5dW27=GayqIMx~Vx(02hTazkoxl$N{|E_?2 z?~?lCr8hd@)W#{tXN}nK)7v|-U2kf`K`1UNi09YR#k{!8F+&o6t<1QgYTOZwP|{F+ zFH7HX@Hm`cZlzOm**5%5HIp>WL_g>%Oe;7g)Zu5wH$c)YyfdH2$*p*T2`i8O)WqYe zQPu6Hh)U=el1;__%yKR_6NaJGZI09vD%foG+)o3~n@ISqdkKAQ-l7?VTXWU@n7A1@ z$eL1g5f@tYAgQjfjow8^3Z<(u_4w-nvAFER6&c)IOowOC}Ak_HwyrXRlQ<=G6L7+f{^prsl;3Iig5`iRQi$Ryt!Plv!SRoZ6;gm zZ}AbgyC4ox9Jn1&XF$CH4F&`aXf&Y7faYxgzt|J=zute>l83Rsxc!@ezgt4r=Cxk! zY?)+g`J0vK&bg+cJ4?k*I%c(f?u_H>hv6@JuA%vLg>ri)-2r2Azp1N=(4__8>8#MkF%eFj_ev9dVsDJ>2tQ=M$NUQ>SG&?I-SLKXy}!V_`vbh z`jO9I0+;7i-7+b7zK!Icckzs4m~(9RTzfWcnle@!keMx|!%tQm5L}ZZ6{M)MsA}ZC zk{dD9&gD*CBZOX3Z3xuyStPvsb7|VKmaFMO0d9f>?of-5OqGTd`p*)#$6hg^6uzRM zNqW^ZavHwO?H_W`e{@3GALyhX=6QCETvc&#Ej`A8ZN|o^GWPxzba!smhs~1KW`_4g zrZb`>>iTuyEBpxpvG+vd6x8Its&m8rlxEiXu=bUd%}*~rPAu3X%Vx2 zXC%GlzIlrJUNF_!8Uu|y!*Tl6wj^9Z#kU(-thGa+r z#7Te{A_*iwSOg@2uu0fK#Eq~i2*@H9cUE^>#ih2jt=d|b)>dt8wTqQnajmxMt6EzX zt+w@9#V)VbD(`m&m-61%_dfrBzrUX!nM@{k&b@QzoO91TYfc`aJSo*~mEuiqzTwe( zku4ZS&qg&furw0qhG{38z#?Y+yHqsPdBxY?aU8FZ(wAcbitIlVtLa}oK{`J^%={_W z*5pNut_ViPZ@tv5;L!yUpVUdE(MZ@`=+E6*J4k92K9PZ`A@nY#b^Mo!$o!!adwAp| zBWiV~W}2sSb!<3!kDWF=(zO1XU~}sq`T}>@Rp(?X&rAf_G#R^6c(xE%3-wg_0Uns4 z^)932x`YfzZf+o6;`WIHLc$QHLbQ_=_2Ios^;67PV44$*l+7B@qX(yF zWl={Y)Nc@zoT?~Jt0@9}*`0{18ew0jxmN@tA)neChlGWU&0c9#P4S~n}gkW!-pW98$K+kzKW8EXwWFpn@;Ag2<|RiFJD7Ng0R?$OBuSwQ$StRP%6@yP*h_ zNkj3?;rRPPSlBClLA+h#L5Qe|KsuvAn453Q#r{*R^{r9Y2m*V8p9uqk!Z7)GaTzjO z#AUsoi9rH#WKx_QxW2utBnH??TnrNDoM9NL0{s(_4?NDH$&)5_N z)Kx8r`$dawzM;MAmCAfDjya6_aE(6Kp zM9Hd*(LdCqbcGG2?X@9VJB>AT-*4dCa}%(9f+C~W4rZS1Bb4r{IgJxl?`p^cV1lFa z8j()>OIgO6vf9Rp>nDRaxbf+%-EZ8FkqikJh`=PM1IK`x_4 z_=A85w2pnV4xh4&o59bAh>%ViF97YL{^W3QmQJQCsE z4HF(kTG*mEQmm0wxIlY?Pu&;cdSRR#QRmdH#c>LxIxTXp(EKQN{1nh^e9f(UnIZsx zPztzcSO`wTOgdgx5U_~{11u8x4#P%JohXb(IiW9(#}{qFkr1hbB?ZVp@AqhRF^}hk zi&bAYfi)Jq&+%jra23VvVvWI0)!3=8+#nS9kdH7YD=Qb@Hu|JYsh!f`tZbmDYev@0uE6E!ohP9fcx)ZK(?ERM9n0iYFa)!yl8Uu23Povv z3-%Coi2!nKyPG>eLa=~kC5$~p^j0cjgvkkv$p(fNGkXYQ4-uf4Qop3tB;m*s!3}UG z(>#-f7>$1lGAQ;L^~rqjhNk7oV3i%&y*J=J_A?-z-lpu{TYJdUpQ)SVY^ZUTR+fWx zz$(nmOL0&Yp1vNwpdj-kl{Pg86%xygYXTIfVR7u?8>ewtzv}>Zv(N~B99g(jp^T%< z=j!lsDRwQA2Z0(Y*J17bJ6>AayN&w5*Lj9q-Z~EJ_{wA(aR#tyLMvuWD7EvlQoP zYJ%;1R1mdHs&(-3FsyQF7@)5dYnG8QQRHMrC?1X#i_{DF5R^vGsH3T1&0V|5$}S<$D3p3ydi9?u9kLLrTztSwq!@)!AMmMJ-mTXS=RlL42P&c*YW7TU z5Mz?l!QnbX5(;;{RIhwTq}~bsHhA24wGjuJ@tYGCo`=ZKfN7H3%0r&GPv1LQ3QHsC znAT_D-57TJvt>v}NBT%i;B}mA50p#;(Jshc)GINAs=9-(bg$Ku87Pt}t>r@;UH5aJ zt-z<_sAnJ#30-F98>|MpCK&uQsQsaMyM)?aH6H4w*wR4cT;moOf`kTeG{o+0xO0wi zEEvp%wjd-}N`}JLcfX$z%=)s<(~#1Iu889?33#lCx;&?~Ij}PD{caQf+hjgCgW{>h z-9LcqDAs|EMQO^XIKt%{EHfaP5jA&=b6tsgz>lZZQotZ=A5x%1y+dGSyMSQ{K0Xxr z#eN_PQF1~Fo&C0u4Ti%fJ@p4Eh+MZ)9W$tI2};4=HGxMASZ%=rX}MhP3H}NRnOS(2 zu+t->J_i9XnwfSuYkOBqRZx8U!6t3i3iRm$ zqc5NAAKxedk0N@nJFDi6HW*(d@JXFQ1zwyyDo#MXJTg02VWO4}qvj67K3MRTa%wnv z0?!vgqAXlZfgY0buGzBH?x)P-V+z7@ATt(t>ND1OA8<-#L&WV=dkreMdguOcsND9e ztC%UU7D9YE7>rYLah=!G;B%Oio^Qq8;q=MCsL2q)gqJ#adHKq_?$(QvyI`Bj$1NSe zoI_FtzZ+AyX`pvws3Ze_|H1^7eIk`vT#$v70tT|LQXdKQ6!=RQKOKq4df!NOuHqZM zDLkN6TjayRSL>*AA{?m1EvBj|9Agmbepa#;_lRIANL$^kEFB^QvCU)D2GzO0@4i6hjbWu0i-rFs_TlTiryTDGx#lPAut6gjr^=pCuL|EvTX(8N5t~ z60|uI>(00wc>^g%=P9HiLlo8S9WMIZuCRrO-g5wQUx)^b$OPo-D+3qelG z+M<^EQvst?*UMT{L1{xwkuZ>li!SW6_Tb#{y^*ng#H|J}L776rhnm+B_+Sf>t=gf) z|5Sv5eJvm8n@Iq}9^=5tWz%5WpNhvB;zAK#j`gzwmC|JTV@xlXgVxXdv%g|7N>p7` z(R;F~3vA?A1}*5~LjWxGyUGN*ds#3J31uBMK!^O4J0QLzzrYFvD@Fu<}SV_4XM+b>@ zxiC~JJ5tsUu^855h;*`3^HmTXk%HfyM19KO&x`R0xsXY>Cu2dVR9BFj9!L`Hd_Z!_ zce3Q3s;}8NUx3WL?eJl&a{*Tx6uK7vzpUTcNaZ*8#OSg%5$CukN1 zLn;D#odaLf(pI-_9x@Q5>JK-n-2rL=YM6kZkeTR=7lPy+2+=RfFPl-Y>SD8RG81Ep zg!Q$NF$m(3K{+o&S~{i7IkB9y;#j`+8XsQ+!4CHb@jNAdOClNzl8ng)$q!6)kvph( zkePK@2xSjp3YBJ?L)AMKM6@p)!Xn_u>bpC zPkH;~G0oG%BKS0g^e~Q7?~{R&WzU(Jt-7oE~zp|GDm|f zHEOuM98j*p0s14VvKkq1VhBp6ljcdTM(w3;YS!~4p^#J$vKF48$9rIIBfDb>z50zL z0CJ|_A5?~`aJMbk)QrN~dblWj+%C!;Tcc`qJY->CZv=aYWDimBqOrqmk20V`HBNwE z3*x7M!q>$O2Z)rv^YPqKV4gK(EASEzY%Ai$jekz=2nJUcECDX>WIo&qOHM{E7RFmyjYe<9Sen*RR`hIE^DRX9>IC)a7SUJ`BsY%DW0L_2q-N7G7H?PRPk|WZCCX(~Dr`9kQ5pV@Hkp zYmg}fhX(LKh%Jjy5@N`LM)_fgb|W0P$y>E=IzB3fz$JSc1luQc`{Oxrcy=9xHaj4Q zqq1sal`|Mmm#8z?Acu_PnopG@S^zD&l!}1qD{-}8;n=E8vCe~Fvh!1gTKz?Stu%N}^uuf-o zevDJ}^xnA66z~a<4ZnJZHNG@Lf};#Imr73nGGbB-#)6*cOE1tl-Iy7`#n+{@B>*1? z2frsgVy+Ov;&ZTHKF`G9{WP+)*7NOok)%i(6$CEvHrQei0+d2PnmPE(B1m2#Lk$@# zTg(SD7Ve%Mz+_(|o7D3te?J_{?0ON@uHf2_Z-n5FRrDNq8pdkOGvX!>%5OF1QR5vW zaD=!a3TdZJi?$LFfpk82GXCeUN%4 zYk<#bYV*Oh?dqBO3;J_D_M;o4@UzQMg0eA2IU675sF04Tiy(%TLketfYLN(5h12tq z)vg7j%DK5Hoje^-qs%SS6hdm13~~{&YMzf(wd;_M{L~1xnA1Z1hM!=10n|#%#avHR zxDjL?YHB1%>wAS{Bt-z-Q141@yAfVyAp>v?#_vf%%iX<})Zm&Cu&pzBKnP>H`96~Q zX+8&!AF=X9pC6uW6__%$hGsg!E$|IihBnS1&03C36K6*2f0K;D!{TvBeqc-B2b~E4 zJCtF50u>HZ^pZwUPEOK)Dz?T__HbpmNS#ToNdk(^f)rVd5=Y^FHzCMx3HZiT+Rq)5 z7u#`w8h=m0`^+&C)%bO>+z4TesmZDMT@QY4;6&f*oKKnc-}O&Mg30S$l=n5D>f0{` zm(ScVAMlt3{cLJ3S-x?qrcnicse#WeTn8Z`;PZ=tgS+787Y8h(M%&;`yd4BtWdSb! z=L%!$Qm|n__5v!?LU%cM`QSrKFZAMrDq-&xrUiA|=rewJ93O`+1mD+ms<#f0!4*gNC}86W zY`22{zYVu8k%Buac!yJ|5&CcRkmKeIhP>aIAo=LOmQhW9fW+DRGP}|CH+bp!c>Myrl>t;4u9cKP$_hLEzJ`~XhttaOACc4>UEM!BA#V=#zBfNL z-QIohi(2ZM&$eGR6C(wCt%0ghQsOo!I;jaNJq3;v^yVmjdafTHLTQeBesj)pnWr}C z4^zk%4e1CHoTZS?*OA#;p{f|~%mqz3N+b=`ZiCZ?p)!trV>xxrQ@!_hGhQR2PDDyV z@T-3KJrVIw{G|%_H*Cz29e~KBobDB>zk87h;ATa;rYfWOy=E$2z2!a?Hi@5;oo^10 zkYy1DynPJ;Ns7(A9|OT$iN^inZ3l~>!%&3*7o`W>!2zN2pa>Jq}5s^7kR&XpZBbp*;B7=lH$DC<5{jq>UMe0?nIsC0VW3DtZD1YEOYr zP6mV`OID_ZLQe7-4B#z$2xSjp>>(TuJd3CPGrsCeF}ufgek{y`^q1QBT#2tXZ;B>U zZJlB4=?yiC|>L#wx&a9C>4;#<0AZSHSAh0yZY6SDFoto1Cs~Diz8!uugIt7OO-!gn{T$_(&C@YxL%<*j^w2?#;vK^B3<_kur;SsqYPlqo#}vxQV}1I z)}aiVqz?4)69Fmi3*$yXNFkG$l|U*BiV^-@C@RFQ0?pQuxRkG&I1)CKSyqa#%Lqhx zG#~$%Z9dOII-)7yUOgnKid&RMuf}*^fXvU#CGb5x1e)0cix;NjQxrJ4)qb6ZU*=07 z*Ip#qc!|iAGG*3F(TIRTFU>LuI-lgZ2nmZNVBMnUMCjup$9zR)}#$6fX_o zd>Y?PDnEy(3!UHR>=lmBMViGB5|@251Bo*9ON=}w7{@B{AFUAOGJo#Lbto249Iq4q zBOQg05WLiS+%#W1XQ5`2M0>9q(;j%+A_&x3GTgk}Oef~x<&q*b2$gv89DCyWMWO)6 z2{mD~C=UNDj8-6p1*^2+!Jj9XA&DYJ60&-uW_mH+pHH09yk#mkQ_CbZ7w|!`fW{X^ z_&piecCgnL%&oyQ+z-4DO4P|V<%~_R5fg1iC60UjkZgGZBKzR&VG)guilPIEc+Es! z?m?)uq6ghAM&^SE?~l8A5D5;l{W@XEMW%cBGY?gB$qY||y@qSt9B!#z#7}^t5aWu6 z<^}X?-fqFK>p+mER><_ZZgz)oe+!d7BLa2j_(s?litUB4xKSaC@JRtU_xJARr|e%z z<8gEo)HN6fQ8l7j&Ka7HDdddsesJ4Q$jx)~E+Z-|pM!@$4xaQf!Qf>!nKiC*h$$f| zUXbrxPSzOoB1j$Z^Bv>x+Z$L#^!Y4VENEnEW5JVol5Wp(r%H#2CY5xE^iHu@@LGso7=xTp~#X%YgLVUZ_wCIHfH;4@~3 zx_Pmb(89+DLKBG#gjY|9$08QOW#4Vkf^fTjQGMfOl@y_RvrE*1}cibx9_rD~@nqa-!t zP&}6}Q$~L`6(bQ8tpEv=nq5LKHxo6YWmHdq0@6mPUU-i1!^&{wb$@y^w$^|vy#p+I zm;*@z6wpK3QuxR$tnjOdTn`aX*rVvW`L&J{aYMY%F4=XDh*Xr zni}A1Ig>D{vZ1MAVr6-)q0Di`c!#e!=A}t-LkxXwCDCntOY(JhEB0U)419eK{#Yj& zZtgTx4MjrGE}7JQi*g3e%&!fDw zKN38P(09ULDqOg50=2^3har9oa*1!@$S)y&N(+bH*Wwa+i4ZSn;c4Sr-3l*p?Li| zjh6;~$z98lZOJ;IFMQBQzZT8iR(D`nvBZ7!u{L_w_5+5xES6?QPB>5x_2CGHIBP>bph zB`}m^VS0x~w&RdJq)W22y2H=+_Mzl>idw#;!#}`uI5n$_TJ=UpfT8~I$YRR#xnmvL z@EwO8bzPn>-sl(_d;4%^3ng7I?9|y!N3xf7NjC;{20H4GC-EmA9=uZ zynfjVpQ}Ohqdu!Y-nh;a0yySJ|82+drtK?yf2^KA{F~dypW5fCy1it6%vIBg>7TDq z{rblI*dOap%)I0|cO}6v%&XARAQO$w`-=998A1~ENmM%yPFn_TyYo$iEZGqj;@WtX{sh{Gr z1xevMzgSwg($DwD1s9@#Eg79#^R`swzfra7_3`BEg@>yO-r2fpTW#m-V}Ga`_2KnZZ>9V? zte6~GxL;GSV@Xp`S-{BAM@tIcUi)j%B>TvsQyU6)z0qW-E+0AO{N;l8KK#{C*Ew?R z70swU$C`{&H;x?l_mWW`eEqAj`M}8WKW`ZI(T%3ySyvZ(Cay)Lcfa!b`0mxW<`O~0 z9B+BITgCQvb&%6-jC{ZH+V=U@pc3i@`2l^!j>XQPQssI1!RTu{o@os#(|9Wmr&a7+ zu|B9=KSpu1=-SR_yMrpsFDO2%s(AZ_-k^!m=M^VrTzmUv!Z69^t^8t9#XGO44V7tQ zl&7A*_Rc1&p(^JE<=?he?0Vg4m|S#TdFK6VyWVOwRF`;rpF2|V?%V4PHC1E0&wq98 z-S@f;wGA(Le{;R!y$^Z~Q)Zm^zIgZAdmj_Vx;Ag0Ubl(6_p6Qdi^lj|QGU1kpw-yW z^@7h;{lq;-oyNxJ&-;8A{oS4ut;VSvynTPznH7ENMclL{_nYg5XQI!1gs1IT@y(4% zqlTaV20yju)HgSq&kVnKA2)Y$MnOw7k*lECg!_Xj~QRAxbVy7QL%p? z=P~o#sSE$ueJ1wSY>!#Jo)_;N8Ws2RiypIo$h~;??3uXVKJu7zd&R|j-;Wyc$2T5x ze?N7x|IV2a|Ge+fLP#$mL18>sC2keuUE(Ot#=FLe+axP5@d692?&HPniqn@|qRv`9 z=7>Ag(q4XQq0RFpac5v&uiNOeHm|+nd8U=U!pgz~?+fDjQKx(H^s@;=`o#;Z(#zt7 zg^7MD$->mU%hYpc6Nko07Gh{yk$6isf`Bvc{tZXWoCLPLW@euNeWUq#>hk-&{ zu=|(YA!PT6e_aEKC^-R^Sv0FJ=HV*E5rEDrKOelW{SlWy+c}7vFN;Ki0wOP8hO$sG zK_Lsl&C7p@3-KK!bdaYc;)r42=b&bSn?+a<7xb8rDU+L(4@H~u^1mhs_q_agxII5H zABmA`J|aRl_B+c{4(+45aO2iW-h|JH+%a?*nFgG8zWr8g*w+IFy zg|1|fzJq|~FkrX}g}De+n4M4ZvY;ZD9l8WfF?0hz#S=)2px{+}zFjPaic+R{P)xyG zULs`g5a}?bFq?HT>>Li1Su=B3WC$f(cu=3sg=Y$f0x}U$n~ma`Lr5XuN-$f&4TZs9 zIQ*GN$m&I~b=*jS1t8F2<_8c=18~CNjhy7mKI9L6BMZ2?fvRm_gh5+)zjzT~gFv5h z{;cMeL2&Q-5Uz9jFD4P(eu{9FkGMODAaCv;OH`m41ozSff)M@V*1fsS_tN05E-2u2 zhppzdldS=EfkTk}b4f3ThdRV1e07J|CWJ%j5)*q+e=4`ZnS9`yjj!gbUtO7#7PxU` zHXNkTh;b2=%Hgv|UfLtSwzlzjR5HK_(fxUOi2{C}XX=4A@mju!3&547525tP12=}v zQ>KYFo*n>FB;^-yDIwX;!dqgp^vEIT20mR%H9~i|cvfC5gp1Vxt2n5GZ4?53a086! zq?{O|vla1OtJ!PSLSjeI(pIr;E5VlmC^?jgI$KQo!X1{LVhB&&9kV3ryS0r^o31Ts z?1_Z49wP4ZV?|R(lq87QfN-p^m zT-8e8Z$rQxs{zP(5z#>)E;k8rL#v^Y9L{N$o{n&2*MXj**`aPEq%8WI!-0ZA&5$+8 zEO3#A4$v4WA%UtQ2?^}C%}_@^40<&IA(;shxJLvL0wf2QqDXgv3;YOm%LTqbbx@Zv z6sWKY3N6BJQ39Fe&4S>NsDWM{kQzHDcQ~>Z?qE{;T%u!{H%n4FUx?|UuU%jzy7vRsH(=(1og>Rht&XaLrV9kQWva&ux_K*L73(b8`J;DX2t^mcnV5Sk43VBZ8Yl|r2jmFueI?;3?jOi=;EMDL2R3BT2LfUzVF~4OV5M}G#RKCbbc8^f4+02Did9fK64r11=FM9-w5d2j zhzl-RAQMOl;FR#pwl!u%au6Rivnw(pV1<_Ep)Y}D2taP&<^xdpnL%1&7;hOtxCHGZ z09xDTtsLaTGL9TsTDlCgI1>1PYp^xj*pjI1A`G0uCB!^X3bkl}*x40zFDW}j4u0JC zc{-v<&M`&817GC)GM;!9PTZg6-WzgJ2^_qSCM5lHz4({Bmb^^C`+@xA@WIy2ByYVO zMu;x-2j8084|LJL8W29NUrM;%xj0t?-}3+k=Jxv&g8o}s{YJov9D=-bKORs$oe)H> z?9WPs$r-52IY1%Q11h-_0%S82NT%OHc;38!|KlNjx8Ca)vm(rQ9xWqm1bKHP5Ys&n z|J+lU)GmLb#XptfPjHx24Ma$gRtiCm;z$Te>EfYf9sVnLDJflLwyyXte?7bkDUX}d z<;kpLoP}h$oc-U*@3HW&V)DKfZ;RT8sdJ{XM($SX)f+X(ojvg+fUqSpb4wAPOUEdFCi09!_+m5q>s; zkmm~xCczaId@qA@A-J;%Nq;D$PQ>uS!--@vm=|t_&ddW%p&uv>6B-Kh9Zp)|PX&Jz zFEoT04n^alNC|umMp1%f=s8M2kq&Tqfdb-W8~j;9!XL_x>|?Pa{WXE?kYF&nBak0M zA1w7h^XeYXDXT!^eG($nbD^At^gxy{AzQc~ST`@h1T?olOBzJFCS^H zlrAYn3KnkAjocAJ?hiRegh<0WuS7nbOAPJ1FpK1#_&Mgp&(dfhwVz|)COcsjzY`?R zi-&WPC$ncWd@40t?eNS=$x*AN0jXVK>U=VvE*njk!mrW;O-^}%R1Ae<+n5C#bo>oE zH}-^q+P899^AEPgS3L=`lWWg3XKA|uT?$| zL-HYAD+VUuK!E!!yOV}Xkt7$u@N=XL>6*4)cY1U>j54iNx~PrVsFS;S=!&F)TXkEb z34@xCveo*&=Y=c$1uH^QOJw#Png#ywq>%@*r8ZJ@bZ5|+ol<-KxHC<%GrPu~X@Zk+ z(8H)~DV&UgQlsLrV!|Do9nFy*%^7=iXZ!&;-z4b++s5p+ zJI}wRs~8QhVfMwRvZ0`>imhr)_>0XX$!{YBJhDwabq&K8?f%jvY;X4{jInpim_zL3 zvhSj8_nV8eq&W=#^83aoN}-RJ)SZNE_rH+)X^^-cuBoH@3P%D8xA72naSRBc{?Uj# zUyR7sd$2a%xErbmI(Lsd{(E!#JFG-m0kjWEon5y{uE>c?uzYZrz-1srt`oQ+*@=S? zLGI&2?FguJ0<7SWLiW8|x~v7b5klVz$4a5E3eeo|-@;UvGa;s6_zxQH5Q`j%Lqi~% zn2;}v^oK@S&PIkQV=8MarhuNGwWlzRO$|)7lc{ScZ=C8RC|f-Ehnk7_sTKva&vjNd zf^Qd?1e(|kcT9E@gx6J6H#J9tpkk7QvmKbB12beb4}&j|*9AmV=-U)6M<9@g0~;_| zLXbElA~+<1gx18W$hz`J@dx0IlbSeu*4(TIur>ogaAC0 zD1_wQ034pm6asAog30}HYdMnr zF#O-S<5Q5}JY>!@Me3ksy74Zznu!XxL@@BWV8&|ds-30fOr5iVF`2^5fsjQ9DjNsc z3@Ew`<|0(E$2JQ-gDc@7)OVo}nJF%WW0&(nLIo-m25zMmCzmKfp+z@|a#QpeBGA+M zmZgzUZ?b>k6V6Tn6x)jK;gY^_^`!M}A{-)H)SJmw(Lp>2I568*b5V$)FT=!LAH)lZ zvc`aUFM1`5KtsXkh$4G3B1f*hd zI7%-5ckF;s?dJRvL2Q5%S3ZZQ1X2Ov@)?jTB`PHOxu5umkS4Yyg1kU_+|d!yuytb? zkmE#z7}|l_KtO!*Z9pXQklj+=<8ny2(RCUS9f*L0*dqjy?c6MoQIim^GeZ51FF_joaXf4tbH zmbw7uyG zOqXX}`Nw#Q!*44ey}G>e`PZJi{n3egFlWF({<9vCU3HBz^S`dSg!#de>-AWkz5!AU zfzAS#;-KpyB;zRVNmGQ4fQOSOG?q>(uW`z>NG1&6$X&B?awkA`TJZQ9`0oDj`vkCp z|Bq`4HuL|u26=uTUjsN3!0a|5FEi`$wY)?J3x9$&De3XGM}5F=kFT+960*`KK%Vu- z*T6w$;2Jn@KfacglQF@Xn)ok$u<#&6dhj(@7{i3z3`jBY@Hz8F1FS#cxF*=sCqSl* zN58wX?FZmwKECJiT;wGGIVQGUUQW`4%&b4}ee^t#7wr)|SIEu{{iVZq`nadUM{&sbjt?J_ zL;u3jLL+v74|L~G0CHpLU?79=_W_VaB)x{zLZkl*q+`WkAde6PfIR@R%$+{&riDf^ zmOquQPZ-O|7YBnGL?aKt9O2L>Ia+8mFqQ%7<_SPvT{jrWAfkEzEhBaakbFkzYgZhZT|w`0fTq|;6pCGkE?|S0{{jz7Ec(#>K%gtJme@309<#W zZ@6foL5$^39l#SnwCx@QVlWeW0OnhQzDa1I(SLoAzCDA1JVG|$S`UDH?LprZX`vAd zas>n z^q85NT4xRW)5C{`-~Mb{SvwKF*EUsGJ4>pe5!_uKqMO_5=tVWr`b f=903Au+oT7XCTPVK2Uq{Fa1aNgrHZk=9vBuI?8m% literal 0 HcmV?d00001 diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalActSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalActSuite.scala index 1d734387..fdbf650b 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalActSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalActSuite.scala @@ -18,7 +18,7 @@ class MedicalActSuite extends AnyFlatSpec { val category: EventCategory[MedicalAct] = "mock_act" } - "apply" should "allow creation of a DiagnosisBuilder event" in { + "apply" should "allow creation of a Medical Act event" in { // Given val expected = Event[MedicalAct](patientID, MockMedicalAct.category, "hosp", "C67", 0.0, timestamp, None) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/NgapActSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/NgapActSuite.scala new file mode 100644 index 00000000..5ccf19f2 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/NgapActSuite.scala @@ -0,0 +1,27 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.events + +import java.sql.Timestamp +import org.mockito.Mockito.mock +import org.scalatest.flatspec.AnyFlatSpec + +class NgapActSuite extends AnyFlatSpec { + + object MockNgapAct extends NgapAct + + val patientID: String = "patientID" + val timestamp: Timestamp = mock(classOf[Timestamp]) + + "apply" should "allow creation of a NgapActBuilder event" in { + + // Given + val expected = Event[NgapAct](patientID, MockNgapAct.category, "A10000001", "9.5", 0.0, timestamp, None) + + // When + val result = MockNgapAct(patientID, "A10000001","9.5", timestamp) + + // Then + assert(result == expected) + } +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpecialitySuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpecialitySuite.scala index 17c029b0..5f45e625 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpecialitySuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpecialitySuite.scala @@ -14,7 +14,7 @@ class PractitionerClaimSpecialitySuite extends AnyFlatSpec { val patientID: String = "patientID" val timestamp: Timestamp = mock(classOf[Timestamp]) - object MockPractionnerClaimSpeciality$ extends PractitionerClaimSpeciality { + object MockPractionnerClaimSpeciality extends PractitionerClaimSpeciality { val category: EventCategory[PractitionerClaimSpeciality] = "mock_prestationSpeciality" } @@ -23,7 +23,7 @@ class PractitionerClaimSpecialitySuite extends AnyFlatSpec { // Given val expected = Event[PractitionerClaimSpeciality]( patientID, - MockPractionnerClaimSpeciality$.category, + MockPractionnerClaimSpeciality.category, "A10000001", "42", 0.0, @@ -32,7 +32,7 @@ class PractitionerClaimSpecialitySuite extends AnyFlatSpec { ) // When - val result = MockPractionnerClaimSpeciality$(patientID, "A10000001", "42", timestamp) + val result = MockPractionnerClaimSpeciality(patientID, "A10000001", "42", timestamp) // Then assert(result == expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala index 1ba30c1b..8fc16072 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala @@ -254,7 +254,7 @@ class DcirBiologyActsSuite extends SharedContext { // Given val codes = Set("238") - val input = sqlCtx.read.parquet("src/test/resources/test-input/DCIR.parquet") + val input = sqlCtx.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") val sources = Sources(dcir = Some(input)) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala index de1e8797..1201e471 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala @@ -3,10 +3,11 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts import java.sql.Date + import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.McoCEAct +import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCEAct, MedicalAct} import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -34,23 +35,16 @@ class McoCEMedicalActsSuite extends SharedContext { "extract" should "return acts that starts with the given codes" in { val sqlCtx = sqlContext import sqlCtx.implicits._ - // Given - val date = new Date(makeTS(2003, 2, 1).getTime) - - val input = List( - ("george", "coloscopie", date), - ("georgette", "angine", date) - ).toDF("NUM_ENQ", "MCO_FMSTC__CCAM_COD", "EXE_SOI_DTD") - - val sources = Sources(mcoCe = Some(input)) - - val expected = List( - McoCEAct("georgette", "ACE", "angine", makeTS(2003, 2, 1)) + val cim10Codes = Set("DEM") + val mcoCe = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") + val expected = Seq[Event[MedicalAct]]( + McoCEAct("200410", "190000059_00022621_2014", "DEMP002", makeTS(2014, 4, 18)) ).toDS + val input = Sources(mcoCe = Some(mcoCe)) // When - val result = McoCeActExtractor.extract(sources, Set("angi")) + val result = McoCeActExtractor.extract(input, cim10Codes) // Then assertDSs(expected, result) @@ -59,24 +53,18 @@ class McoCEMedicalActsSuite extends SharedContext { "extract" should "return all acts when codes are empty" in { val sqlCtx = sqlContext import sqlCtx.implicits._ - // Given - val date = new Date(makeTS(2003, 2, 1).getTime) - - val input = List( - ("george", "coloscopie", date), - ("georgette", "angine", date) - ).toDF("NUM_ENQ", "MCO_FMSTC__CCAM_COD", "EXE_SOI_DTD") - - val sources = Sources(mcoCe = Some(input)) - - val expected = List( - McoCEAct("georgette", "ACE", "angine", makeTS(2003, 2, 1)), - McoCEAct("george", "ACE", "coloscopie", makeTS(2003, 2, 1)) + val cim10Codes = Set("DEM") + val mcoCe = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") + val expected = Seq[Event[MedicalAct]]( + McoCEAct("200410", "190000059_00022621_2014", "DEMP002", makeTS(2014, 4, 18)), + McoCEAct("2004100010", "390780146_00098382_2014", "DZQM006", makeTS(2014, 11, 6)), + McoCEAct("2004100010", "390780146_00015211_2014", "DEQP005", makeTS(2014, 2, 11)) ).toDS + val input = Sources(mcoCe = Some(mcoCe)) // When - val result = McoCeActExtractor.extract(sources, Set.empty) + val result = McoCeActExtractor.extract(input, Set.empty) // Then assertDSs(expected, result) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala new file mode 100644 index 00000000..61bc8224 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala @@ -0,0 +1,83 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.acts + +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.events.{Event, MedicalAct, SsrCEAct} +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.util.functions.makeTS +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types._ + +class SsrCEMedicalActsSuite extends SharedContext { + + import SsrCeActExtractor.ColNames + + val schema = StructType( + StructField(ColNames.PatientID, StringType) :: + StructField(ColNames.CamCode, StringType) :: + StructField(ColNames.Date, DateType) :: Nil + ) + + "isInStudy" should "return true when a study code is found in the row" in { + + // Given + val codes = Set("AAAA", "BBBB") + val inputArray = Array[Any]("Patient_A", "AAAA", makeTS(2010, 1, 1)) + val inputRow = new GenericRowWithSchema(inputArray, schema) + + // When + val result = SsrCeActExtractor.isInStudy(codes)(inputRow) + + // Then + assert(result) + } + + it should "return false when no code is found in the row" in { + + // Given + val codes = Set("AAAA", "BBBB") + val inputArray = Array[Any]("Patient_A", "CCCC", makeTS(2010, 1, 1)) + val inputRow = new GenericRowWithSchema(inputArray, schema) + + // When + val result = SsrCeActExtractor.isInStudy(codes)(inputRow) + + // Then + assert(!result) + } + + "extract" should "return a Dataset of Ssr CE Medical Acts" in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val codes = Set("AAAA", "CCCC") + + val input = Seq( + ("Patient_A", "AAAA", makeTS(2010, 1, 1)), + ("Patient_A", "BBBB", makeTS(2010, 2, 1)), + ("Patient_B", "CCCC", makeTS(2010, 3, 1)), + ("Patient_B", "CCCC", makeTS(2010, 4, 1)), + ("Patient_C", "BBBB", makeTS(2010, 5, 1)) + ).toDF( + ColNames.PatientID, ColNames.CamCode, ColNames.Date + ) + + val sources = Sources(ssrCe = Some(input)) + + val expected = Seq[Event[MedicalAct]]( + SsrCEAct("Patient_A", "ACE", "AAAA", 0.0, makeTS(2010, 1, 1)), + SsrCEAct("Patient_B", "ACE", "CCCC", 0.0, makeTS(2010, 3, 1)), + SsrCEAct("Patient_B", "ACE", "CCCC", 0.0, makeTS(2010, 4, 1)) + ).toDS + + // When + val result = SsrCeActExtractor.extract(sources, codes) + + // Then + assertDSs(result, expected) + } + +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala new file mode 100644 index 00000000..84b3af13 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala @@ -0,0 +1,92 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts + +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType} +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class DcirNgapActsExtractorSuite extends SharedContext { + + object ngapClassKeyLetterCoefficient extends NgapActClassConfig { + val ngapKeyLetters: Seq[String] = Seq("C") + val ngapCoefficients: Seq[String] = Seq( + "0.42" + ) + override val ngapPrsNatRefs: Seq[String] = Seq() + } + + object ngapPrsNatRef extends NgapActClassConfig { + val ngapKeyLetters: Seq[String] = Seq("D") + val ngapCoefficients: Seq[String] = Seq( + "0.45" + ) + override val ngapPrsNatRefs: Seq[String] = Seq("1111") + } + + "extract" should "extract ngap acts events from raw data with a ngapClass based on key letter B2 and coefficient" in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val dcir: DataFrame = sqlCtx.read.load("src/test/resources/test-input/DCIR.parquet") + val irNat: DataFrame = sqlCtx.read.load("src/test/resources/value_tables/IR_NAT_V.parquet") + + val source = new Sources(dcir = Some(dcir), irNat = Some(irNat)) + + val expected = Seq[Event[NgapAct]]( + DcirNgapAct("Patient_01", "unknown_source", "1111_C_0.42", 1.0, makeTS(2006, 2, 1)), + DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 1.0, makeTS(2006, 1, 15)), + DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 1.0, makeTS(2006, 1, 30)) + ).toDS + + val ngapConf = NgapActConfig( + actsCategories = List( + ngapClassKeyLetterCoefficient + ) + ) + // When + val result = new DcirNgapActExtractor(ngapConf).extract(source, Set.empty) + + // Then + assertDSs(result, expected) + } + + + "extract from prsNatRef" should "extract ngap acts events from raw data with a ngapClass based on prsNatRef" in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val dcir: DataFrame = sqlCtx.read.load("src/test/resources/test-input/DCIR.parquet") + val irNat: DataFrame = sqlCtx.read.load("src/test/resources/value_tables/IR_NAT_V.parquet") + + val source = new Sources(dcir = Some(dcir), irNat = Some(irNat)) + + val expected = Seq[Event[NgapAct]]( + DcirNgapAct("Patient_01", "unknown_source", "1111_C_0.42", 1.0, makeTS(2006, 2, 1)), + DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 1.0, makeTS(2006, 1, 15)), + DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 1.0, makeTS(2006, 1, 30)) + ).toDS + + val ngapConf = NgapActConfig( + actsCategories = List( + ngapPrsNatRef + ) + ) + // When + val result = new DcirNgapActExtractor(ngapConf).extract(source, Set.empty) + + // Then + assertDSs(result, expected) + } + +} + diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala new file mode 100644 index 00000000..52adf8a2 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala @@ -0,0 +1,118 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts + +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.util.functions.makeTS +import org.apache.spark.sql.DataFrame + +class McoNgapActsExtractorSuite extends SharedContext { + + object ngapClassKeyLetterCoefficient extends NgapActClassConfig { + val ngapKeyLetters: Seq[String] = Seq("ABG") + val ngapCoefficients: Seq[String] = Seq("42.0") + } + + object ngapKeyLetter extends NgapActClassConfig { + val ngapKeyLetters: Seq[String] = Seq("ABC") + val ngapCoefficients: Seq[String] = Seq.empty + } + + "extract" should "extract ngap acts events from raw data with a ngapClass based on key letter B2 and coefficient" in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val mcoCe: DataFrame = sqlCtx.read.load("src/test/resources/test-input/MCO_CE.parquet") + val source = new Sources(mcoCe = Some(mcoCe)) + + val expected = Seq[Event[NgapAct]]( + McoCeFbstcNgapAct("200410", "190000059_00022621_2014", "PmsiCe_ABG_42.0", makeTS(2014, 4, 18)) + ).toDS + + val ngapConf = NgapActConfig( + actsCategories = List( + ngapClassKeyLetterCoefficient + ) + ) + // When + val result = new McoCeFbstcNgapActExtractor(ngapConf).extract(source, Set.empty) + // Then + assertDSs(result, expected) + } + + + "extract from prsNatRef" should "extract ngap acts events from raw data with a ngapKeyLetter only" in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + // Given + val mcoCe: DataFrame = sqlCtx.read.load("src/test/resources/test-input/MCO_CE.parquet") + val source = new Sources(mcoCe = Some(mcoCe)) + + val expected = Seq[Event[NgapAct]]( + McoCeFbstcNgapAct("2004100010", "390780146_00064268_2014", "PmsiCe_ABC_1.0", makeTS(2014, 7, 18)) + ).toDS + + val ngapConf = NgapActConfig( + actsCategories = List( + ngapKeyLetter + ) + ) + // When + val result = new McoCeFbstcNgapActExtractor(ngapConf).extract(source, Set.empty) + // Then + assertDSs(result, expected) + } + + "extract from prsNatRef" should "extract all ngap acts events from raw MCO_FBSTC data " in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + // Given + val mcoCe: DataFrame = sqlCtx.read.load("src/test/resources/test-input/MCO_CE.parquet") + val source = new Sources(mcoCe = Some(mcoCe)) + + val expected = Seq[Event[NgapAct]]( + McoCeFbstcNgapAct("2004100010", "390780146_00064268_2014", "PmsiCe_ABC_1.0", makeTS(2014, 7, 18)), + McoCeFbstcNgapAct("200410", "190000059_00022621_2014", "PmsiCe_ABG_42.0", makeTS(2014, 4, 18)), + McoCeFbstcNgapAct("2004100010", "390780146_00114237_2014", "PmsiCe_ACO_0", makeTS(2014, 12, 12)) + ).toDS + + val ngapConf = NgapActConfig( + actsCategories = List.empty + ) + // When + val result = new McoCeFbstcNgapActExtractor(ngapConf).extract(source, Set.empty) + + // Then + assertDSs(result, expected) + } + + "extract from prsNatRef" should "extract all ngap acts events from raw MCO_FCSTC data " in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + // Given + val mcoCe: DataFrame = sqlCtx.read.load("src/test/resources/test-input/MCO_CE.parquet") + val source = new Sources(mcoCe = Some(mcoCe)) + + val expected = Seq[Event[NgapAct]]( + McoCeFcstcNgapAct("2004100010", "390780146_00026744_2014", "PmsiCe_A F_126936.43", makeTS(2014, 4, 4)), + McoCeFcstcNgapAct("2004100010", "390780146_00114237_2014", "PmsiCe_ADE_802770.97", makeTS(2014, 12, 12)), + McoCeFcstcNgapAct("2004100010", "710780214_00000130_2014", "PmsiCe_ADC_420416.2", makeTS(2014, 4, 15)) + ).toDS + + val ngapConf = NgapActConfig( + actsCategories = List.empty + ) + // When + val result = new McoCeFcstcNgapActExtractor(ngapConf).extract(source, Set.empty) + + // Then + assertDSs(result, expected) + } + +} + diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala index 13184472..b5ea67d5 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala @@ -5,10 +5,12 @@ package fr.polytechnique.cmap.cnam.etl.extractors.prestations import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.{Event, MedicalPractitionerClaim, NonMedicalPractitionerClaim, PractitionerClaimSpeciality} +import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCeFbstcMedicalPractitionerClaim, McoCeFcstcMedicalPractitionerClaim, MedicalPractitionerClaim, NonMedicalPractitionerClaim, PractitionerClaimSpeciality} import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS +import scala.collection.immutable.Stream.Empty + class PractitionerClaimSpecialityExtractorSuite extends SharedContext { "extract" should "extract health care related services provided by medical practitioner raw data" in { @@ -18,7 +20,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // Given val medicalSpeCodes = List("42") - val input = spark.read.parquet("src/test/resources/test-input/DCIR.parquet") + val input = spark.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") val sources = Sources(dcir = Some(input)) val expected = Seq[Event[PractitionerClaimSpeciality]]( @@ -42,15 +44,14 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { import sqlCtx.implicits._ // Given - val nonMedicalSpeCodes = List() - val input = spark.read.parquet("src/test/resources/test-input/DCIR.parquet") + val nonMedicalSpeCodes = List("42") + val input = spark.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") val sources = Sources(dcir = Some(input)) val expected = Seq[Event[PractitionerClaimSpeciality]]( NonMedicalPractitionerClaim("Patient_01", "A10000001", "42", makeTS(2006, 2, 1)), NonMedicalPractitionerClaim("Patient_01", "A10000001", "42", makeTS(2006, 1, 15)), NonMedicalPractitionerClaim("Patient_01", "A10000001", "42", makeTS(2006, 1, 30)), - NonMedicalPractitionerClaim("Patient_02", "A10000005", "0", makeTS(2006, 1, 5)), NonMedicalPractitionerClaim("Patient_02", "A10000005", "42", makeTS(2006, 1, 15)), NonMedicalPractitionerClaim("Patient_02", "A10000005", "42", makeTS(2006, 1, 30)) ).toDS @@ -79,4 +80,98 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // Then assert(result == expected) } + + + "extract" should "discard providers with a specialty of 0" in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val medicalSpeCodes = List() + val input = spark.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") + val sources = Sources(dcir = Some(input)) + + val expected = Seq[Event[PractitionerClaimSpeciality]]( + MedicalPractitionerClaim("Patient_01", "A10000001", "42", makeTS(2006, 2, 1)), + MedicalPractitionerClaim("Patient_01", "A10000001", "42", makeTS(2006, 1, 15)), + MedicalPractitionerClaim("Patient_01", "A10000001", "42", makeTS(2006, 1, 30)) + ).toDS + + + // When + val result = MedicalPractitionerClaimExtractor.extract(sources, medicalSpeCodes.toSet) + + // Then + assertDSs(result, expected) + } + + "extract" should "extract health care related services provided by medical practitioner in McoCe" in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val medicalSpeCodes = List("1") + val input = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") + val sources = Sources(mcoCe = Some(input)) + + val expected = Seq[Event[PractitionerClaimSpeciality]]( + McoCeFbstcMedicalPractitionerClaim("2004100010", "390780146_00064268_2014", "1", makeTS(2014, 7, 18)) + ).toDS + + + // When + val result = McoCeFbstcSpecialtyExtractor.extract(sources, medicalSpeCodes.toSet) + + // Then + assertDSs(result, expected) + } + + "extract" should "extract all health care related services provided by medical practitioner in McoCe__Fbstc" in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val medicalSpeCodes = List.empty + val input = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") + val sources = Sources(mcoCe = Some(input)) + + val expected = Seq[Event[PractitionerClaimSpeciality]]( + McoCeFbstcMedicalPractitionerClaim("2004100010", "390780146_00064268_2014", "1", makeTS(2014, 7, 18)), + McoCeFbstcMedicalPractitionerClaim("2004100010", "390780146_00114237_2014", "22", makeTS(2014, 12, 12)) + ).toDS + + + // When + val result = McoCeFbstcSpecialtyExtractor.extract(sources, medicalSpeCodes.toSet) + + // Then + assertDSs(result, expected) + } + + "extract" should "extract all health care related services provided by medical practitioner in McoCe__Fcstc" in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val medicalSpeCodes = List.empty + val input = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") + val sources = Sources(mcoCe = Some(input)) + + val expected = Seq[Event[PractitionerClaimSpeciality]]( + McoCeFcstcMedicalPractitionerClaim("2004100010", "390780146_00114237_2014", "1", makeTS(2014, 12, 12)), + McoCeFcstcMedicalPractitionerClaim("2004100010", "710780214_00000130_2014", "25", makeTS(2014, 4, 15)), + McoCeFcstcMedicalPractitionerClaim("2004100010", "390780146_00026744_2014", "13", makeTS(2014, 4, 4)) + ).toDS + + + // When + val result = McoCeFcstcSpecialtyExtractor.extract(sources, medicalSpeCodes.toSet) + + // Then + assertDSs(result, expected) + } } From 23510a0f3a8a33bbbda6b7fd4e9513da6e4a371d Mon Sep 17 00:00:00 2001 From: Angel Francisco Orta Date: Wed, 4 Mar 2020 14:34:39 +0100 Subject: [PATCH 15/38] CNAM-441: Create markdown for transformer CNAM-441: Added information to better explain transformer behaviour CNAM-441: Change sentence to better comprenhension --- Transformer.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 Transformer.md diff --git a/Transformer.md b/Transformer.md new file mode 100644 index 00000000..d35f5749 --- /dev/null +++ b/Transformer.md @@ -0,0 +1,41 @@ +# Transformers + +A transformer is used to turn one or multiple datasets +into another one. A `Transformer` accept a configuration class as a parameter +a configuration class of type + [TransformerConfig](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/TransformerConfig.scala). +This configuration file control the behaviour of the `Transformer` through its methods and values. + + +`transform` is the main and the unique public available method of `Transformer`, this method accepts one or several datasets as input + and after the application of the transformation logic, returns a dataset of type `Event`([Events](Events.md)) with a new `category`. + + If we take as an example [ExposureTransformer](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposureTransformer.scala), + we pass an instance of the class [ExposuresTransformerConfig](https://github.com/X-DataInitiative/SCALPEL-Extraction/blob/master/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposuresTransformerConfig.scala) + as parameter, it extends a `TransformerConfig`. + ```scala +/** + * A tag to improve readability of subclasses and to allow type binding + */ + +trait TransformerConfig + ``` +The config class is used to control the Transformer's behaviour. + ```scala +class ExposuresTransformerConfig( + val exposurePeriodAdder: ExposurePeriodAdder) extends TransformerConfig with Serializable + ``` +This main method take as parameter two datasets of type `Event` and subtypes `FollowUp` and `Drug` respectively +and return a dataset of type `Event` and subtype `Exposure`. + +```scala + def transform(followUps: Dataset[Event[FollowUp]])(drugs: Dataset[Event[Drug]]): Dataset[Event[Exposure]] = { + drugs + .transform(config.exposurePeriodAdder.toExposure(followUps)) + .transform(regulateWithFollowUps(followUps)) + } + ``` +The objective of the `transform` method is thus to combine, filter, check relations between multiples `Event`s to form a new `Event`. +In the example of the `ExposureTransformer`, it combines multiples `Drug` `Event`s based on the logic of +the `exposurePeriodAdder` of the `ExposuresTransformerConfig` to form an `Exposure` while +making sure that it is contained within the period defined in the `FollowUp` `Event`. \ No newline at end of file From c0e1782786fd6402b9782b6f07c043b2f9581c0c Mon Sep 17 00:00:00 2001 From: Dian SUN Date: Tue, 18 Feb 2020 15:08:17 +0100 Subject: [PATCH 16/38] CNAM-442: emergency extractor --- .../cmap/cnam/etl/events/HospitalStay.scala | 4 + .../McoceEmergenciesExtractor.scala | 74 +++++++++++++++++++ .../cmap/cnam/study/bulk/BulkMain.scala | 4 +- .../PmsiHospitalStaysExtractor.scala | 14 ++++ .../McoceEmergenciesExtractorSuite.scala | 44 +++++++++++ 5 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PmsiHospitalStaysExtractor.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala index 6c23c1f6..9ae2f916 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala @@ -20,6 +20,10 @@ object McoHospitalStay extends HospitalStay { override val category: EventCategory[HospitalStay] = "mco_hospital_stay" } +object McoceEmergency extends HospitalStay { + override val category: EventCategory[HospitalStay] = "mco_ce_emergency" +} + /** Hospital Stay in the SSR PMSI are one type of hospital stays, see : * https://documentation-snds.health-data-hub.fr/glossaire/ssr.html */ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala new file mode 100644 index 00000000..4a1164f4 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala @@ -0,0 +1,74 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays + +import java.sql.{Date, Timestamp} +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.{DataFrame, Row} +import fr.polytechnique.cmap.cnam.etl.events.{Event, HospitalStay, McoceEmergency} +import fr.polytechnique.cmap.cnam.etl.extractors.Extractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +object McoceEmergenciesExtractor extends Extractor[HospitalStay] with McoceEmergenciesExtractor { + /** Allows to check if the Row from the Source is considered in the current Study. + * + * @param codes A set of codes being considered in the Study. + * @param row The row itself. + * @return A boolean value. + */ + override def isInStudy(codes: Set[String])(row: Row): Boolean = true + + /** Checks if the passed Row has the information needed to build the Event. + * + * @param row The row itself. + * @return A boolean value. + */ + override def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(ColNames.ActCode)) && row.getAs[String](ColNames.ActCode).startsWith("ATU") + + /** Builds the Event. + * + * @param row The row itself. + * @return An event object. + */ + override def builder(row: Row): Seq[Event[HospitalStay]] = { + val patientID = extractPatientId(row) + val groupId = extractGroupId(row) + val start = extractStart(row) + val end = extractEnd(row) + Seq(McoceEmergency(patientID, groupId, start, end)) + } + + /** Gets and prepares all the needed columns from the Source. + * + * @param sources Source object [[Sources]] that contains all sources. + * @return A dataframe with mco columns. + */ + override def getInput(sources: Sources): DataFrame = sources.mcoCe.get.select(ColNames.all.map(col): _*) +} + +trait McoceEmergenciesExtractor { + + final object ColNames extends Serializable { + final val PatientID: String = "NUM_ENQ" + final val EtaNum: String = "ETA_NUM" + final val SeqNum: String = "SEQ_NUM" + final val StartDate: String = "EXE_SOI_DTD" + final val EndDate: String = "EXE_SOI_DTF" + final val Year: String = "MCO_FBSTC__SOR_ANN" + final val ActCode: String = "MCO_FBSTC__ACT_COD" + final val all: List[String] = List(PatientID, EtaNum, SeqNum, Year, StartDate, EndDate, ActCode) + } + + def extractPatientId(r: Row): String = { + r.getAs[String](ColNames.PatientID) + } + + def extractGroupId(r: Row): String = { + r.getAs[String](ColNames.EtaNum) + "_" + + r.getAs[String](ColNames.SeqNum) + "_" + + r.getAs[Int](ColNames.Year).toString + } + + def extractEnd(r: Row): Timestamp = new Timestamp(r.getAs[Date](ColNames.EndDate).getTime) + + def extractStart(r: Row): Timestamp = new Timestamp(r.getAs[Date](ColNames.StartDate).getTime) + +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala index 158966d2..9eee1709 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala @@ -10,10 +10,10 @@ import fr.polytechnique.cmap.cnam.etl.extractors.acts.{DcirMedicalActExtractor, import fr.polytechnique.cmap.cnam.etl.extractors.classifications.GhmExtractor import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ import fr.polytechnique.cmap.cnam.etl.extractors.drugs.DrugExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.study.bulk.extractors.PmsiHospitalStaysExtractor import fr.polytechnique.cmap.cnam.util.Path import fr.polytechnique.cmap.cnam.util.reporting.{MainMetadata, OperationMetadata, OperationReporter, OperationTypes} @@ -49,7 +49,7 @@ object BulkMain extends Main { drugs.unpersist() - val hospitalStays = McoHospitalStaysExtractor.extract(sources, Set.empty).cache() + val hospitalStays = PmsiHospitalStaysExtractor.extract(sources).cache() operationsMetadata += { OperationReporter .report( diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PmsiHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PmsiHospitalStaysExtractor.scala new file mode 100644 index 00000000..496645db --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PmsiHospitalStaysExtractor.scala @@ -0,0 +1,14 @@ +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import org.apache.spark.sql.Dataset +import fr.polytechnique.cmap.cnam.etl.events.{Event, HospitalStay} +import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.{McoHospitalStaysExtractor, McoceEmergenciesExtractor} +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +object PmsiHospitalStaysExtractor { + def extract(sources: Sources): Dataset[Event[HospitalStay]] = { + val mco = McoHospitalStaysExtractor.extract(sources, Set.empty[String]) + val mcoce = McoceEmergenciesExtractor.extract(sources, Set.empty[String]) + mco.union(mcoce) + } +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala new file mode 100644 index 00000000..89f8ad7d --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala @@ -0,0 +1,44 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays + +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.events.McoceEmergency +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class McoceEmergenciesExtractorSuite extends SharedContext { + + "extract" should "return the hospital stays(emergencies) from mcoce sources" in { + //Given + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + val df = Seq( + ("20041", "830100525", "00030885", "2012", makeTS(2012, 4, 21), makeTS(2012, 4, 21), "ATU"), + ("20041", "830100525", "00032716", "2012", makeTS(2012, 4, 28), makeTS(2012, 4, 29), "ATU"), + ("20041", "830100525", "00032738", "2012", makeTS(2012, 4, 29), makeTS(2012, 4, 29), "ATU"), + ("20041", "830100525", "00032038", "2013", makeTS(2013, 4, 29), makeTS(2013, 4, 29), "FTN"), + ("200410", "190000059", "00044158", null, makeTS(2010, 3, 5), makeTS(2010, 3, 5), null), + ("200410", "190000059", "00027825", null, makeTS(2011, 5, 13), makeTS(2011, 5, 13), null), + ("200410", "190000059", "00020161", null, makeTS(2012, 4, 10), makeTS(2012, 4, 10), null), + ("200410", "190000059", "00022621", null, makeTS(2014, 4, 18), makeTS(2014, 5, 18), null), + ("2004838055", "680000395", "00018597", "2010", makeTS(2010, 7, 11), makeTS(2010, 7, 11), "ATU F"), + ("2006191920", "680000395", "00009656", "2013", makeTS(2013, 9, 24), makeTS(2013, 9, 24), "ATU N") + ).toDF("NUM_ENQ", "ETA_NUM", "SEQ_NUM", "MCO_FBSTC__SOR_ANN", "EXE_SOI_DTD", "EXE_SOI_DTF", "MCO_FBSTC__ACT_COD") + + val sources = Sources(mcoCe = Some(df)) + + val expected = Seq( + McoceEmergency("20041", "830100525_00032716_2012", makeTS(2012, 4, 28), makeTS(2012, 4, 29)), + McoceEmergency("20041", "830100525_00030885_2012", makeTS(2012, 4, 21), makeTS(2012, 4, 21)), + McoceEmergency("20041", "830100525_00032738_2012", makeTS(2012, 4, 29), makeTS(2012, 4, 29)), + McoceEmergency("2004838055", "680000395_00018597_2010", makeTS(2010, 7, 11), makeTS(2010, 7, 11)), + McoceEmergency("2006191920", "680000395_00009656_2013", makeTS(2013, 9, 24), makeTS(2013, 9, 24)) + ).toDS() + + val res = McoceEmergenciesExtractor.extract(sources, Set.empty[String]) + + assertDSs(expected, res) + + } + +} From 971b0e0971765a5c61e98d2e61ca3d4a2665fff1 Mon Sep 17 00:00:00 2001 From: Dian SUN Date: Tue, 25 Feb 2020 16:20:48 +0100 Subject: [PATCH 17/38] CNAM-442: exact date in medical act 1. a fake date for null start date in dcir 2. a day offset for start date in mco 3. bulk config for drug level --- src/main/resources/config/bulk/default.conf | 5 ++- .../acts/DcirMedicalActExtractor.scala | 12 +++++-- .../etl/extractors/acts/McoActExtractor.scala | 19 ++++++++-- .../cnam/etl/extractors/mco/McoSource.scala | 3 +- .../cmap/cnam/study/bulk/BulkConfig.scala | 19 +++++----- .../cnam/study/bulk/BulkConfigLoader.scala | 16 +++++++++ .../extractors/acts/McoMedicalActsSuite.scala | 36 +++++++++---------- 7 files changed, 77 insertions(+), 33 deletions(-) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfigLoader.scala diff --git a/src/main/resources/config/bulk/default.conf b/src/main/resources/config/bulk/default.conf index c2ebd5f8..7f65caa0 100644 --- a/src/main/resources/config/bulk/default.conf +++ b/src/main/resources/config/bulk/default.conf @@ -1,5 +1,8 @@ root { - + drugs { + level: "cip13" + families: [] + } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala index 8c642ee5..9e612c28 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala @@ -2,11 +2,13 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts +import java.sql.Timestamp import scala.util.Try import org.apache.spark.sql.{DataFrame, Row} import fr.polytechnique.cmap.cnam.etl.events.{BiologyDcirAct, DcirAct, EventBuilder, MedicalAct} import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.util.functions trait DcirActExtractor extends DcirExtractor[MedicalAct] { @@ -54,10 +56,15 @@ trait DcirActExtractor extends DcirExtractor[MedicalAct] { override def extractWeight(r: Row): Double = 1.0 + override def extractStart(r: Row): Timestamp = { + Try(super.extractStart(r)) recover { + case _: NullPointerException => functions.makeTS(1970, 1, 1) + } + }.get } -object DcirMedicalActExtractor extends DcirActExtractor { +object DcirMedicalActExtractor extends DcirActExtractor { override val columnName: String = ColNames.CamCode override val eventBuilder: EventBuilder = DcirAct @@ -68,9 +75,10 @@ object DcirMedicalActExtractor extends DcirActExtractor { } -object DcirBiologyActExtractor extends DcirActExtractor { +object DcirBiologyActExtractor extends DcirActExtractor { override val columnName: String = ColNames.BioCode override val eventBuilder: EventBuilder = BiologyDcirAct + override def code = (row: Row) => row.getAs[Double](columnName).toString override def getInput(sources: Sources): DataFrame = sources.dcir.get.select( diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoActExtractor.scala index f9ca1098..b0ada5bc 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoActExtractor.scala @@ -2,15 +2,30 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts +import java.sql.Timestamp +import me.danielpes.spark.datetime.Period +import me.danielpes.spark.datetime.implicits.DateImplicits +import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoExtractor -object McoCcamActExtractor extends McoExtractor[MedicalAct] { +trait McoActExtractor extends McoExtractor[MedicalAct] { + override def extractStart(r: Row): Timestamp = { + (r.getAs[Timestamp](NewColumns.EstimatedStayStart) + Period(days = getDateOffset(r))).get + } + + def getDateOffset(r: Row): Int = r.getAs[String](ColNames.CCAMDelayDate) match { + case null => 0 + case value: String => value.toInt + } +} + +object McoCcamActExtractor extends McoActExtractor { final override val columnName: String = ColNames.CCAM override val eventBuilder: EventBuilder = McoCCAMAct } -object McoCimMedicalActExtractor extends McoExtractor[MedicalAct] { +object McoCimMedicalActExtractor extends McoActExtractor { final override val columnName: String = ColNames.DP override val eventBuilder: EventBuilder = McoCIM10Act } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala index 38e4da09..de886ec4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala @@ -28,9 +28,10 @@ trait McoSource extends ColumnNames { val StayEndDate: ColName = "SOR_DAT" val StartDate: ColName = "EXE_SOI_DTD" val EndDate: ColName = "EXE_SOI_DTF" + val CCAMDelayDate: ColName = "MCO_A__ENT_DAT_DEL" val all = List( PatientID, DP, DR, DA, CCAM, GHM, EtaNum, RsaNum, Year, ExitMode, StayEndMonth, StayEndYear, StayLength, - StayStartDate, StayEndDate, StartDate, EndDate + StayStartDate, StayEndDate, StartDate, EndDate, CCAMDelayDate ) val hospitalStayPart = List( PatientID, EtaNum, RsaNum, Year, StartDate, EndDate diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala index bc876f39..fab6df79 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala @@ -4,19 +4,20 @@ package fr.polytechnique.cmap.cnam.study.bulk import java.time.LocalDate import pureconfig.generic.auto._ +import fr.polytechnique.cmap.cnam.etl.config.BaseConfig import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig -import fr.polytechnique.cmap.cnam.etl.config.{BaseConfig, ConfigLoader} import fr.polytechnique.cmap.cnam.etl.extractors.drugs.DrugConfig -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.Cip13Level +import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.{Cip13Level, DrugClassificationLevel} case class BulkConfig( input: StudyConfig.InputPaths, - output: StudyConfig.OutputPaths) extends StudyConfig { - val drugs: DrugConfig = BulkConfig.DrugsConfig + output: StudyConfig.OutputPaths, + drugs: DrugConfig = BulkConfig.DrugsConfig()) extends StudyConfig { val base: BaseConfig = BulkConfig.BaseConfig } -object BulkConfig extends ConfigLoader { +object BulkConfig extends BulkConfigLoader { def load(path: String, env: String): BulkConfig = { val defaultPath = "config/bulk/default.conf" @@ -29,9 +30,9 @@ object BulkConfig extends ConfigLoader { studyEnd = LocalDate.of(2015, 1, 1) ) - final object DrugsConfig extends DrugConfig( - level = Cip13Level, - families = List.empty - ) + final case class DrugsConfig( + override val level: DrugClassificationLevel = Cip13Level, + override val families: List[DrugClassConfig] = List.empty + ) extends DrugConfig(level = level, families = families) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfigLoader.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfigLoader.scala new file mode 100644 index 00000000..28d48c42 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfigLoader.scala @@ -0,0 +1,16 @@ +package fr.polytechnique.cmap.cnam.study.bulk + +import pureconfig.ConfigReader +import fr.polytechnique.cmap.cnam.etl.config.ConfigLoader +import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.study.fall.config.FallDrugClassConfig + +class BulkConfigLoader extends ConfigLoader { + + //For reading DrugConfigClasses that are related to the Fall study + implicit val drugConfigReader: ConfigReader[DrugClassConfig] = ConfigReader[String].map( + family => + FallDrugClassConfig.familyFromString(family) + ) + +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala index 20aab4ac..0bd1ff89 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala @@ -17,9 +17,9 @@ class McoMedicalActsSuite extends SharedContext { val cim10Codes = Set("C670") val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val expected = Seq[Event[MedicalAct]]( - McoCIM10Act("Patient_02", "10000123_10000987_2006", "C670", makeTS(2005, 12, 29)), - McoCIM10Act("Patient_02", "10000123_20000123_2007", "C670", makeTS(2007, 1, 29)), - McoCIM10Act("Patient_02", "10000123_30000546_2008", "C670", makeTS(2008, 3, 8)) + McoCIM10Act("Patient_02", "10000123_10000987_2006", "C670", makeTS(2005, 12, 31)), + McoCIM10Act("Patient_02", "10000123_20000123_2007", "C670", makeTS(2007, 1, 31)), + McoCIM10Act("Patient_02", "10000123_30000546_2008", "C670", makeTS(2008, 3, 10)) ).toDS val input = Sources(mco = Some(mco)) @@ -37,12 +37,12 @@ class McoMedicalActsSuite extends SharedContext { // Given val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val expected = Seq[Event[MedicalAct]]( - McoCIM10Act("Patient_02", "10000123_10000543_2006", "C671", makeTS(2005, 12, 24)), - McoCIM10Act("Patient_02", "10000123_10000987_2006", "C670", makeTS(2005, 12, 29)), - McoCIM10Act("Patient_02", "10000123_20000123_2007", "C670", makeTS(2007, 1, 29)), - McoCIM10Act("Patient_02", "10000123_20000345_2007", "C671", makeTS(2007, 1, 29)), - McoCIM10Act("Patient_02", "10000123_30000546_2008", "C670", makeTS(2008, 3, 8)), - McoCIM10Act("Patient_02", "10000123_30000852_2008", "C671", makeTS(2008, 3, 15)) + McoCIM10Act("Patient_02", "10000123_10000543_2006", "C671", makeTS(2005, 12, 26)), + McoCIM10Act("Patient_02", "10000123_10000987_2006", "C670", makeTS(2005, 12, 31)), + McoCIM10Act("Patient_02", "10000123_20000123_2007", "C670", makeTS(2007, 1, 31)), + McoCIM10Act("Patient_02", "10000123_20000345_2007", "C671", makeTS(2007, 1, 31)), + McoCIM10Act("Patient_02", "10000123_30000546_2008", "C670", makeTS(2008, 3, 10)), + McoCIM10Act("Patient_02", "10000123_30000852_2008", "C671", makeTS(2008, 3, 17)) ).toDS val input = Sources(mco = Some(mco)) @@ -61,9 +61,9 @@ class McoMedicalActsSuite extends SharedContext { val ccamCodes = Set("AAAA123") val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val expected = Seq[Event[MedicalAct]]( - McoCCAMAct("Patient_02", "10000123_10000987_2006", "AAAA123", makeTS(2005, 12, 29)), - McoCCAMAct("Patient_02", "10000123_20000123_2007", "AAAA123", makeTS(2007, 1, 29)), - McoCCAMAct("Patient_02", "10000123_30000546_2008", "AAAA123", makeTS(2008, 3, 8)) + McoCCAMAct("Patient_02", "10000123_10000987_2006", "AAAA123", makeTS(2005, 12, 31)), + McoCCAMAct("Patient_02", "10000123_20000123_2007", "AAAA123", makeTS(2007, 1, 31)), + McoCCAMAct("Patient_02", "10000123_30000546_2008", "AAAA123", makeTS(2008, 3, 10)) ).toDS val input = Sources(mco = Some(mco)) @@ -81,12 +81,12 @@ class McoMedicalActsSuite extends SharedContext { // Given val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val expected = Seq[Event[MedicalAct]]( - McoCCAMAct("Patient_02", "10000123_10000987_2006", "AAAA123", makeTS(2005, 12, 29)), - McoCCAMAct("Patient_02", "10000123_20000123_2007", "AAAA123", makeTS(2007, 1, 29)), - McoCCAMAct("Patient_02", "10000123_30000546_2008", "AAAA123", makeTS(2008, 3, 8)), - McoCCAMAct("Patient_02", "10000123_20000345_2007", "BBBB123", makeTS(2007, 1, 29)), - McoCCAMAct("Patient_02", "10000123_10000543_2006", "BBBB123", makeTS(2005, 12, 24)), - McoCCAMAct("Patient_02", "10000123_30000852_2008", "BBBB123", makeTS(2008, 3, 15)) + McoCCAMAct("Patient_02", "10000123_10000987_2006", "AAAA123", makeTS(2005, 12, 31)), + McoCCAMAct("Patient_02", "10000123_20000123_2007", "AAAA123", makeTS(2007, 1, 31)), + McoCCAMAct("Patient_02", "10000123_30000546_2008", "AAAA123", makeTS(2008, 3, 10)), + McoCCAMAct("Patient_02", "10000123_20000345_2007", "BBBB123", makeTS(2007, 1, 31)), + McoCCAMAct("Patient_02", "10000123_10000543_2006", "BBBB123", makeTS(2005, 12, 26)), + McoCCAMAct("Patient_02", "10000123_30000852_2008", "BBBB123", makeTS(2008, 3, 17)) ).toDS val input = Sources(mco = Some(mco)) From 7c30096823a64b37c0c3c41f01d46214d62241fd Mon Sep 17 00:00:00 2001 From: Dian SUN Date: Wed, 26 Feb 2020 14:29:15 +0100 Subject: [PATCH 18/38] CNAM-442: extract weight from hospitalisation --- .../cmap/cnam/etl/events/HospitalStay.scala | 19 ++++++++++----- .../McoHospitalStaysExtractor.scala | 24 +++++++++++++++++++ .../cnam/etl/extractors/mco/McoSource.scala | 4 +++- .../McoHospitalStayExtractorSuite.scala | 24 +++++++++---------- 4 files changed, 52 insertions(+), 19 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala index 9ae2f916..84c08182 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/HospitalStay.scala @@ -3,8 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.events.Event.Columns._ trait HospitalStay extends AnyEvent with EventBuilder { @@ -12,6 +10,15 @@ trait HospitalStay extends AnyEvent with EventBuilder { def apply(patientID: String, hospitalID: String, start: Timestamp, end: Timestamp): Event[HospitalStay] = apply(patientID, hospitalID, hospitalID, 0D, start, Some(end)) + + def apply( + patientID: String, + hospitalID: String, + weight: Double, + start: Timestamp, + end: Timestamp): Event[HospitalStay] = + apply(patientID, hospitalID, hospitalID, weight, start, Some(end)) + } object HospitalStay extends HospitalStay @@ -25,15 +32,15 @@ object McoceEmergency extends HospitalStay { } /** Hospital Stay in the SSR PMSI are one type of hospital stays, see : - * https://documentation-snds.health-data-hub.fr/glossaire/ssr.html - */ + * https://documentation-snds.health-data-hub.fr/glossaire/ssr.html + */ object SsrHospitalStay extends HospitalStay { override val category: EventCategory[HospitalStay] = "ssr_hospital_stay" } /** HAD Hospital Stay in the HAD PMSI are one type of hospital stays, see : - * https://documentation-snds.health-data-hub.fr/glossaire/had.html - */ + * https://documentation-snds.health-data-hub.fr/glossaire/had.html + */ object HadHospitalStay extends HospitalStay { override val category: EventCategory[HospitalStay] = "had_hospital_stay" } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala index 468f528c..776c88f2 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala @@ -1,6 +1,7 @@ package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays import java.sql.{Date, Timestamp} +import scala.util.Try import org.apache.spark.sql.functions.col import org.apache.spark.sql.{DataFrame, Row} import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HospitalStay, McoHospitalStay} @@ -19,5 +20,28 @@ object McoHospitalStaysExtractor extends McoExtractor[HospitalStay] { override def code: Row => String = extractGroupId + override def extractWeight(r: Row): Double = { + getFromValue(r) flatMap (from => getFromType(r) map (fromType => from + fromType * 0.1)) recover { case _ => -1D } get + } + + private def getFromValue(r: Row): Try[Double] = { + Try { + r.getAs[String](ColNames.StayFrom).toDouble + } + } + + private def getFromType(r: Row): Try[Double] = { + + val isNull = (s: String) => s == null || s.trim.isEmpty + + Try { + r.getAs[String](ColNames.StayFromType) match { + case value if isNull(value) => 0D + case "R" => 8D + case value => value.toDouble + } + } + } + override def getInput(sources: Sources): DataFrame = sources.mco.get.select(ColNames.hospitalStayPart.map(col): _*) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala index de886ec4..8cd103d0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala @@ -29,12 +29,14 @@ trait McoSource extends ColumnNames { val StartDate: ColName = "EXE_SOI_DTD" val EndDate: ColName = "EXE_SOI_DTF" val CCAMDelayDate: ColName = "MCO_A__ENT_DAT_DEL" + val StayFrom: ColName = "MCO_B__ENT_MOD" + val StayFromType: ColName = "MCO_B__ENT_PRV" val all = List( PatientID, DP, DR, DA, CCAM, GHM, EtaNum, RsaNum, Year, ExitMode, StayEndMonth, StayEndYear, StayLength, StayStartDate, StayEndDate, StartDate, EndDate, CCAMDelayDate ) val hospitalStayPart = List( - PatientID, EtaNum, RsaNum, Year, StartDate, EndDate + PatientID, EtaNum, RsaNum, Year, StartDate, EndDate, StayFrom, StayFromType ) } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala index c861f633..bfa06da3 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala @@ -19,12 +19,12 @@ class McoHospitalStayExtractorSuite extends SharedContext { val sources = Sources(mco = Some(mco)) val expected: Dataset[Event[HospitalStay]] = Seq( - McoHospitalStay("Patient_02", "10000123_20000123_2007", makeTS(2007, 2, 1), makeTS(2007, 2, 10)), - McoHospitalStay("Patient_02", "10000123_20000345_2007", makeTS(2007, 2, 1), makeTS(2007, 2, 10)), - McoHospitalStay("Patient_02", "10000123_30000546_2008", makeTS(2008, 3, 1), makeTS(2008, 3, 10)), - McoHospitalStay("Patient_02", "10000123_30000852_2008", makeTS(2008, 3, 1), makeTS(2008, 3, 10)), - McoHospitalStay("Patient_02", "10000123_10000987_2006", makeTS(2006, 1, 1), makeTS(2006, 1, 10)), - McoHospitalStay("Patient_02", "10000123_10000543_2006", makeTS(2006, 1, 1), makeTS(2006, 1, 10)) + McoHospitalStay("Patient_02", "10000123_20000123_2007", 8.0D, makeTS(2007, 2, 1), makeTS(2007, 2, 10)), + McoHospitalStay("Patient_02", "10000123_20000345_2007", 8.0D, makeTS(2007, 2, 1), makeTS(2007, 2, 10)), + McoHospitalStay("Patient_02", "10000123_30000546_2008", 8.0D, makeTS(2008, 3, 1), makeTS(2008, 3, 10)), + McoHospitalStay("Patient_02", "10000123_30000852_2008", 8.0D, makeTS(2008, 3, 1), makeTS(2008, 3, 10)), + McoHospitalStay("Patient_02", "10000123_10000987_2006", 8.0D, makeTS(2006, 1, 1), makeTS(2006, 1, 10)), + McoHospitalStay("Patient_02", "10000123_10000543_2006", -1.0D, makeTS(2006, 1, 1), makeTS(2006, 1, 10)) ).toDS() //When @@ -43,12 +43,12 @@ class McoHospitalStayExtractorSuite extends SharedContext { val sources = Sources(mco = Some(mco)) val expected: Dataset[Event[HospitalStay]] = Seq( - McoHospitalStay("Patient_02", "10000123_20000123_2007", makeTS(2007, 2, 1), makeTS(2007, 2, 10)), - McoHospitalStay("Patient_02", "10000123_20000345_2007", makeTS(2007, 2, 1), makeTS(2007, 2, 10)), - McoHospitalStay("Patient_02", "10000123_30000546_2008", makeTS(2008, 3, 1), makeTS(2008, 3, 10)), - McoHospitalStay("Patient_02", "10000123_30000852_2008", makeTS(2008, 3, 1), makeTS(2008, 3, 10)), - McoHospitalStay("Patient_02", "10000123_10000987_2006", makeTS(2006, 1, 1), makeTS(2006, 1, 10)), - McoHospitalStay("Patient_02", "10000123_10000543_2006", makeTS(2006, 1, 1), makeTS(2006, 1, 10)) + McoHospitalStay("Patient_02", "10000123_20000123_2007", 8.0D, makeTS(2007, 2, 1), makeTS(2007, 2, 10)), + McoHospitalStay("Patient_02", "10000123_20000345_2007", 8.0D, makeTS(2007, 2, 1), makeTS(2007, 2, 10)), + McoHospitalStay("Patient_02", "10000123_30000546_2008", 8.0D, makeTS(2008, 3, 1), makeTS(2008, 3, 10)), + McoHospitalStay("Patient_02", "10000123_30000852_2008", 8.0D, makeTS(2008, 3, 1), makeTS(2008, 3, 10)), + McoHospitalStay("Patient_02", "10000123_10000987_2006", 8.0D, makeTS(2006, 1, 1), makeTS(2006, 1, 10)), + McoHospitalStay("Patient_02", "10000123_10000543_2006", -1.0D, makeTS(2006, 1, 1), makeTS(2006, 1, 10)) ).toDS() //When From 1a3b65c29a409e84d27be125e673dcf4705b2959 Mon Sep 17 00:00:00 2001 From: Dian SUN Date: Wed, 4 Mar 2020 16:14:50 +0100 Subject: [PATCH 19/38] CNAM-442: test unit --- .../acts/DcirMedicalActExtractor.scala | 2 +- .../acts/DcirMedicalActsSuite.scala | 8 +++-- .../McoHospitalStayExtractorSuite.scala | 31 +++++++++++++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala index 9e612c28..1edd9786 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala @@ -58,7 +58,7 @@ trait DcirActExtractor extends DcirExtractor[MedicalAct] { override def extractStart(r: Row): Timestamp = { Try(super.extractStart(r)) recover { - case _: NullPointerException => functions.makeTS(1970, 1, 1) + case _ => functions.makeTS(1970, 1, 1) } }.get } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala index d2e1c8bf..082de8b7 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala @@ -218,14 +218,15 @@ class DcirMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val codes = Set("AAAA", "CCCC") + val codes = Set("AAAA", "CCCC", "DDDD") val input = Seq( ("Patient_A", "AAAA", "NABM1", makeTS(2010, 1, 1), None, None, None), ("Patient_A", "BBBB", "NABM1", makeTS(2010, 2, 1), Some(1D), Some(0D), Some(1D)), ("Patient_B", "CCCC", "NABM1", makeTS(2010, 3, 1), None, None, None), ("Patient_B", "CCCC", "NABM1", makeTS(2010, 4, 1), Some(7D), Some(0D), Some(2D)), - ("Patient_C", "BBBB", "NABM1", makeTS(2010, 5, 1), Some(1D), Some(0D), Some(2D)) + ("Patient_C", "BBBB", "NABM1", makeTS(2010, 5, 1), Some(1D), Some(0D), Some(2D)), + ("Patient_D", "DDDD", "NABM1", null, None, None, None) ).toDF( ColNames.PatientID, ColNames.CamCode, ColNames.BioCode, ColNames.Date, ColNames.InstitutionCode, ColNames.GHSCode, ColNames.Sector @@ -236,7 +237,8 @@ class DcirMedicalActsSuite extends SharedContext { val expected = Seq[Event[MedicalAct]]( DcirAct("Patient_A", DcirAct.groupID.Liberal, "AAAA", 1.0, makeTS(2010, 1, 1)), DcirAct("Patient_B", DcirAct.groupID.Liberal, "CCCC", 1.0, makeTS(2010, 3, 1)), - DcirAct("Patient_B", DcirAct.groupID.PrivateAmbulatory, "CCCC", 1.0, makeTS(2010, 4, 1)) + DcirAct("Patient_B", DcirAct.groupID.PrivateAmbulatory, "CCCC", 1.0, makeTS(2010, 4, 1)), + DcirAct("Patient_D", DcirAct.groupID.Liberal, "DDDD", 1.0, makeTS(1970, 1, 1)) ).toDS // When diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala index bfa06da3..83a052ef 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala @@ -58,4 +58,35 @@ class McoHospitalStayExtractorSuite extends SharedContext { assertDSs(expected, result) } + "extract" should "calculate correct weight from mco sources" in { + //Given + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + val df = Seq( + ("Patient_02", "10000123", "20000123", "2007", makeTS(2007, 1, 1), makeTS(2007, 1, 10), "8", "5"), + ("Patient_02", "10000123", "20000345", "2007", makeTS(2007, 2, 1), makeTS(2007, 2, 10), "8", "5"), + ("Patient_02", "10000123", "20000546", "2007", makeTS(2007, 3, 1), makeTS(2007, 3, 10), "8", "R"), + ("Patient_02", "10000123", "20000852", "2007", makeTS(2007, 4, 1), makeTS(2007, 4, 10), "8", null), + ("Patient_02", "10000123", "20000987", "2007", makeTS(2007, 5, 1), makeTS(2007, 5, 10), null, null) + ).toDF("NUM_ENQ", "ETA_NUM", "RSA_NUM", "SOR_ANN", "EXE_SOI_DTD", "EXE_SOI_DTF", "MCO_B__ENT_MOD", "MCO_B__ENT_PRV") + + val sources = Sources(mco = Some(df)) + + val expected: Dataset[Event[HospitalStay]] = Seq( + McoHospitalStay("Patient_02", "10000123_20000123_2007", 8.5D, makeTS(2007, 1, 1), makeTS(2007, 1, 10)), + McoHospitalStay("Patient_02", "10000123_20000345_2007", 8.5D, makeTS(2007, 2, 1), makeTS(2007, 2, 10)), + McoHospitalStay("Patient_02", "10000123_20000546_2007", 8.8D, makeTS(2007, 3, 1), makeTS(2007, 3, 10)), + McoHospitalStay("Patient_02", "10000123_20000852_2007", 8.0D, makeTS(2007, 4, 1), makeTS(2007, 4, 10)), + McoHospitalStay("Patient_02", "10000123_20000987_2007", -1.0D, makeTS(2007, 5, 1), makeTS(2007, 5, 10)) + ).toDS() + + //When + val result: Dataset[Event[HospitalStay]] = McoHospitalStaysExtractor.extract(sources, Set.empty[String]) + + //Then + assertDSs(expected, result, true) + + } + } From 65da6176b06508f9bbdc7fc33d19b205ae080915 Mon Sep 17 00:00:00 2001 From: thomashdh Date: Mon, 17 Feb 2020 10:41:05 +0100 Subject: [PATCH 20/38] DREES-108 : Adding new branch DREES-108-Sources --- .../cnam/etl/extractors/ssr/SsrSource.scala | 22 ++++---- .../cmap/cnam/etl/sources/Sources.scala | 20 +++++++- .../etl/sources/data/DataSourceManager.scala | 3 +- .../cnam/etl/sources/data/DcirFilters.scala | 4 ++ .../cnam/etl/sources/data/DcirSource.scala | 6 +++ .../etl/sources/data/DoublonFinessPmsi.scala | 39 ++++++++++++++ .../cnam/etl/sources/data/HadFilters.scala | 17 +++++-- .../cnam/etl/sources/data/HadSource.scala | 22 ++------ .../cnam/etl/sources/data/McoFilters.scala | 39 +++++++++----- .../cnam/etl/sources/data/SsrCeSource.scala | 1 + .../cnam/etl/sources/data/SsrFilters.scala | 20 +++++++- .../cnam/etl/sources/data/SsrSource.scala | 51 ++++++------------- .../cnam/etl/sources/value/IrNatSource.scala | 1 + 13 files changed, 158 insertions(+), 87 deletions(-) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DoublonFinessPmsi.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala index 21b1255f..5c4fd4b9 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala @@ -9,26 +9,26 @@ import fr.polytechnique.cmap.cnam.util.ColumnUtilities.parseTimestamp trait SsrSource extends ColumnNames { final object ColNames extends Serializable { - val PatientID: ColName = "SSR_C__NUM_ENQ" - val DP: ColName = "MOR_PRP" - val DR: ColName = "ETL_AFF" + val PatientID: ColName = "NUM_ENQ" + val DP: ColName = "SSR_B__MOR_PRP" + val DR: ColName = "SSR_B__ETL_AFF" val DA: ColName = "SSR_D__DGN_COD" val CCAM: ColName = "SSR_CCAM__CCAM_ACT" // present only in 2014-2015-2016, should be addeed for the studies on the echantillon val CSARR: ColName = "SSR_CSARR__CSARR_COD" - val FP_PEC: ColName = "FP_PEC" + val FP_PEC: ColName = "SSR_B__FP_PEC" // MOI_ANN_SOR_SEJ ? //val GHM: ColName = "SSR_B__GRG_GHM" -> GME TODO val EtaNum: ColName = "ETA_NUM" val RhaNum: ColName = "RHA_NUM" val RhsNum: ColName = "RHS_NUM" val Year: ColName = "year" - val StayStartMonth: ColName = "SSR_C__MOI_LUN_1S" - val StayStartYear: ColName = "SSR_C__ANN_LUN_1S" - val StayLength: ColName = "RHS_ANT_SEJ_ENT" - val StayStartDate: ColName = "SSR_C__ENT_DAT" - val StayEndDate: ColName = "SSR_C__SOR_DAT" - val StartDate: ColName = "SSR_C__EXE_SOI_DTD" - val EndDate: ColName = "SSR_C__EXE_SOI_DTF" + val StayStartMonth: ColName = "MOI_LUN_1S" + val StayStartYear: ColName = "ANN_LUN_1S" + val StayLength: ColName = "SSR_B__RHS_ANT_SEJ_ENT" + val StayStartDate: ColName = "ENT_DAT" + val StayEndDate: ColName = "SOR_DAT" + val StartDate: ColName = "EXE_SOI_DTD" + val EndDate: ColName = "EXE_SOI_DTF" val all = List( PatientID, DP, DR, DA, CCAM, CSARR, FP_PEC, EtaNum, RhaNum, RhsNum, StayLength, //CSARR, StayStartDate, StayEndDate, StartDate, EndDate, Year, StayStartMonth, StayStartYear diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala index 720b3f91..0254c1f8 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala @@ -3,6 +3,7 @@ package fr.polytechnique.cmap.cnam.etl.sources import java.sql.Timestamp + import org.apache.spark.sql.{DataFrame, SQLContext} import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig.InputPaths import fr.polytechnique.cmap.cnam.etl.sources.data.{DcirSource, HadSource, McoCeSource, McoSource, SsrCeSource, SsrSource} @@ -22,7 +23,11 @@ case class Sources( dosages: Option[DataFrame] = None) object Sources { - + /** Sanitize all sources with usual filters for snds analysis. + * + * @param sources An instance containing all available SNDS data and value tables. + * @return + */ def sanitize(sources: Sources): Sources = { sources.copy( dcir = sources.dcir.map(DcirSource.sanitize), @@ -39,6 +44,13 @@ object Sources { ) } + /** Filter sources to keep only data concerning the study period. + * + * @param sources An instance containing all available SNDS data and value tables. + * @param studyStart + * @param studyEnd + * @return + */ def sanitizeDates(sources: Sources, studyStart: Timestamp, studyEnd: Timestamp): Sources = { sources.copy( dcir = sources.dcir.map(DcirSource.sanitizeDates(_, studyStart, studyEnd)), @@ -55,6 +67,12 @@ object Sources { ) } + /** Read all source dataframe. + * + * @param sqlContext + * @param paths + * @return + */ def read(sqlContext: SQLContext, paths: InputPaths): Sources = { this.read( sqlContext, diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DataSourceManager.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DataSourceManager.scala index 86c97ce2..36ec62fd 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DataSourceManager.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DataSourceManager.scala @@ -11,8 +11,7 @@ trait DataSourceManager extends SourceManager { val EXE_SOI_DTD: Column = col("EXE_SOI_DTD") - /** - * This method santize the sources based on the passed dates. + /** Sanitize the sources based on the passed dates. * * @param sourceData the data source that will be sanitized * @param studyStart the study start date diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirFilters.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirFilters.scala index 547c2b44..a88f417f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirFilters.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirFilters.scala @@ -5,6 +5,10 @@ package fr.polytechnique.cmap.cnam.etl.sources.data import org.apache.spark.sql.DataFrame private[data] class DcirFilters(rawDcir: DataFrame) { + /** Remove lines for information only. + * + * @return dataframe with lines corresponding to some real interaction with the healthcare system. + */ def filterInformationFlux: DataFrame = { rawDcir.where(DcirSource.DPN_QLF =!= 71) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirSource.scala index ee6393c8..1b6440b2 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirSource.scala @@ -12,6 +12,12 @@ object DcirSource extends DataSourceManager with DcirSourceSanitizer { val BEN_CDI_NIR: Column = col("BEN_CDI_NIR") val DPN_QLF: Column = col("DPN_QLF") + /** Sanitize the dcir with usual filter for analysis + * - remove the lines without a proper *Nature de la prestation* + * - remove the lines for information + * @param dcir + * @return a new instance of the Source, with the sanitized data + */ override def sanitize(dcir: DataFrame): DataFrame = { dcir.where(DcirSource.BSE_PRS_NAT =!= 0) .filterInformationFlux diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DoublonFinessPmsi.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DoublonFinessPmsi.scala new file mode 100644 index 00000000..855f762f --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DoublonFinessPmsi.scala @@ -0,0 +1,39 @@ +package fr.polytechnique.cmap.cnam.etl.sources.data + +object DoublonFinessPmsi { + /** List of geogaphic FINESS for APHP, HCL and APHM (duplicates for this information also goes back through legal FINESS. + * This list is detailed on the [snds documentation](https://documentation-snds.health-data-hub.fr/fiches/depenses_hopital_public.html#valorisation-des-sejours-a-l-hopital-public) + * and recommended by people building the "Reste à Charge" Database on the SNDS. It is more exhaustive than the precedent list. But it should + */ + val specialHospitalCodes = List( + //APHP + "600100093","600100101","620100016","640790150","640797098","750100018","750806226", + "750100356","750802845","750801524","750100067","750100075","750100042","750805228", + "750018939","750018988","750100091","750100083","750100109","750833345","750019069", + "750803306","750019028","750100125","750801441","750019119","750100166","750100141", + "750100182","750100315","750019648","750830945","750008344","750803199","750803447", + "750100216","750100208","750833337","750000358","750019168","750809576","750100299", + "750041543","750100232","750802258","750803058","750803454","750100273","750801797", + "750803371","830100012","830009809","910100015","910100031","910100023","910005529", + "920100013","920008059","920100021","920008109","920100039","920100047","920812930", + "920008158","920100054","920008208","920100062","920712551","920000122","930100052", + "930100037","930018684","930812334","930811294","930100045","930011408","930811237", + "930100011","940018021","940100027","940100019","940170087","940005739","940100076", + "940100035","940802291","940100043","940019144","940005788","940100050","940802317", + "940100068","940005838","950100024","950100016", + //APHM + "130808231","130809775","130782931", + "130806003","130783293","130804305","130790330","130804297","130783236","130796873", + "130808520","130799695","130802085","130808256","130806052","130808538","130802101", + "130796550","130014558","130784234","130035884","130784259","130796279","130792856", + "130017239","130792534","130793698","130792898","130808546","130789175","130780521", + "130033996","130018229", + //HCL + "90787460","690007422","690007539","690784186","690787429", + "690783063","690007364","690787452","690007406","690787486","690784210","690799416", + "690784137","690007281","690799366","690784202","690023072","690787577","690784194", + "690007380","690784129","690029194","690806054","690029210","690787767","690784178", + "690783154","690799358","690787817","690787742","690784152","690784145","690783121", + "690787478","690007455","690787494","830100558","830213484" + ) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala index 2b0d59f0..e547e8bb 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala @@ -1,13 +1,13 @@ package fr.polytechnique.cmap.cnam.etl.sources.data +import fr.polytechnique.cmap.cnam.etl.sources.data.DoublonFinessPmsi.specialHospitalCodes import org.apache.spark.sql.{Column, DataFrame} private[data] class HadFilters(rawHad: DataFrame) { - - /** Removing return codes which significate error in the PMSI - * - * This is a classic filter for all PMSI products. Other filters may be implemented in the future. - * */ + /** Filter out Had corrupted stays as returned by the ATIH. + * + * @return + */ def filterHadCorruptedHospitalStays: DataFrame = { val fictionalAndFalseHospitalStaysFilter: Column = HadSource .NIR_RET === "0" and HadSource.SEJ_RET === "0" and HadSource @@ -17,5 +17,12 @@ private[data] class HadFilters(rawHad: DataFrame) { rawHad.filter(fictionalAndFalseHospitalStaysFilter) } + /** Remove geographic finess doublons from APHP, APHM and HCL. + * + * @return + */ + def filterSpecialHospitals: DataFrame = { + rawHad.where(!HadSource.ETA_NUM_EPMSI.isin(specialHospitalCodes: _*)) + } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSource.scala index c0af76ef..85b90dde 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSource.scala @@ -4,33 +4,20 @@ import org.apache.spark.sql.functions.{col, to_date, year} import org.apache.spark.sql.{Column, DataFrame, SQLContext} /** - * Extractor class for the HAD table - * - * + * Extractor class for the SSR table + * This filtering is explained here + * https://datainitiative.atlassian.net/wiki/pages/viewpage.action?pageId=40304642 */ object HadSource extends DataSourceManager with HadSourceSanitizer { - // unused for filtering -// val ETA_NUM_EPMSI: Column = col("ETA_NUM_EPMSI") -// val RHAD_NUM: Column = col("RHAD_NUM") -// val DP: Column = col("HAD_B__DGN_PAL") -// val PEC_PAL: Column = col("HAD_B_PEC_PAL") -// val PEC_ASS: Column = col("HAD_B_PEC_ASS") -// val DA: Column = col("HAD_D__DGN_ASS") -// val CCAM: Column = col("HAD_A__CCAM_COD") - + val ETA_NUM_EPMSI: Column = col("ETA_NUM_EPMSI") val NIR_RET: Column = col("NIR_RET") val SEJ_RET: Column = col("SEJ_RET") val FHO_RET: Column = col("FHO_RET") val PMS_RET: Column = col("PMS_RET") val DAT_RET: Column = col("DAT_RET") - -// val ENT_DAT: Column = col("ENT_DAT") -// val SOR_DAT: Column = col("SOR_DAT") val Year: Column = col("year") - //val foreignKeys: List[String] = List("ETA_NUM_EPMSI", "RHA_NUM", "year") - override def sanitize(rawHad: DataFrame): DataFrame = { /** * This filtering is explained here @@ -38,5 +25,6 @@ object HadSource extends DataSourceManager with HadSourceSanitizer { */ rawHad .filterHadCorruptedHospitalStays + .filterSpecialHospitals } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/McoFilters.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/McoFilters.scala index 9ecec783..9ae8a425 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/McoFilters.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/McoFilters.scala @@ -3,13 +3,21 @@ package fr.polytechnique.cmap.cnam.etl.sources.data import org.apache.spark.sql.{Column, DataFrame} +import fr.polytechnique.cmap.cnam.etl.sources.data.DoublonFinessPmsi.specialHospitalCodes private[data] class McoFilters(rawMco: DataFrame) { - + /** Filter out Finess doublons from APHP, APHM and HCL + * + * @return + */ def filterSpecialHospitals: DataFrame = { - rawMco.where(!McoSource.ETA_NUM.isin(McoFilters.specialHospitalCodes: _*)) + rawMco.where(!McoSource.ETA_NUM.isin(specialHospitalCodes: _*)) } + /** Filter out shared stays (between hospitals). + * + * @return + */ def filterSharedHospitalStays: DataFrame = { val duplicateHospitalsFilter: Column = McoSource.SEJ_TYP.isNull or McoSource .SEJ_TYP =!= "B" or (McoSource.GRG_GHM.like("28%") and !McoSource.GRG_GHM @@ -17,14 +25,26 @@ private[data] class McoFilters(rawMco: DataFrame) { rawMco.filter(duplicateHospitalsFilter) } + /** Filter out induced abortion (IVG). + * + * @return + */ def filterIVG: DataFrame = { rawMco.filter(McoSource.GRG_GHM =!= "14Z08Z") } + /** Filter out non reimbursed stays. + * + * @return + */ def filterNonReimbursedStays: DataFrame = { rawMco.filter(McoSource.GHS_NUM =!= "9999") } + /** Filter out Mco corrupted stays as returned by the ATIH. + * + * @return + */ def filterMcoCorruptedHospitalStays: DataFrame = { val fictionalAndFalseHospitalStaysFilter: Column = !McoSource.GRG_GHM.like("90%") and McoSource .NIR_RET === "0" and McoSource.SEJ_RET === "0" and McoSource @@ -33,6 +53,10 @@ private[data] class McoFilters(rawMco: DataFrame) { rawMco.filter(fictionalAndFalseHospitalStaysFilter) } + /** Filter out McoCe corrupted stays as returned by the ATIH. + * + * @return + */ def filterMcoCeCorruptedHospitalStays: DataFrame = { val fictionalAndFalseHospitalStaysFilter: Column = McoCeSource.NIR_RET === "0" and McoCeSource .NAI_RET === "0" and McoCeSource.SEX_RET === "0" and McoCeSource @@ -43,17 +67,6 @@ private[data] class McoFilters(rawMco: DataFrame) { } private[data] object McoFilters { - - val specialHospitalCodes = List( - "130780521", "130783236", "130783293", "130784234", "130804297", "600100101", "690783154", - "690784137", "690784152", "690784178", "690787478", "750041543", "750100018", "750100042", - "750100075", "750100083", "750100091", "750100109", "750100125", "750100166", "750100208", - "750100216", "750100232", "750100273", "750100299", "750801441", "750803447", "750803454", - "830100558", "910100015", "910100023", "920100013", "920100021", "920100039", "920100047", - "920100054", "920100062", "930100011", "930100037", "930100045", "940100027", "940100035", - "940100043", "940100050", "940100068", "950100016" - ) - // radiotherapie & dialyse exceptions val GRG_GHMExceptions = List("28Z14Z", "28Z15Z", "28Z16Z") diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrCeSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrCeSource.scala index 7f415386..9a5ac54d 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrCeSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrCeSource.scala @@ -16,6 +16,7 @@ object SsrCeSource extends DataSourceManager with SsrSourceSanitizer { override def sanitize(ssrCe: DataFrame): DataFrame = { ssrCe + .filterSpecialHospitals .filterSsrCeCorruptedHospitalStays } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFilters.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFilters.scala index 86e1650c..c0d9a54c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFilters.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFilters.scala @@ -1,11 +1,15 @@ package fr.polytechnique.cmap.cnam.etl.sources.data +import fr.polytechnique.cmap.cnam.etl.sources.data.DoublonFinessPmsi.specialHospitalCodes import org.apache.spark.sql.{Column, DataFrame} private[data] class SsrFilters(rawSsr: DataFrame) { - + /** Filter out Ssr corrupted stays as returned by the ATIH. + * + * @return + */ def filterSsrCorruptedHospitalStays: DataFrame = { - val fictionalAndFalseHospitalStaysFilter: Column = SsrSource + val fictionalAndFalseHospitalStaysFilter: Column = !SsrSource.GRG_GME.like("90%") and SsrSource .NIR_RET === "0" and SsrSource.SEJ_RET === "0" and SsrSource .FHO_RET === "0" and SsrSource.PMS_RET === "0" and SsrSource .DAT_RET === "0" @@ -13,6 +17,10 @@ private[data] class SsrFilters(rawSsr: DataFrame) { rawSsr.filter(fictionalAndFalseHospitalStaysFilter) } + /** Filter out SsrCe corrupted stays as returned by the ATIH. + * + * @return + */ def filterSsrCeCorruptedHospitalStays: DataFrame = { val fictionalAndFalseHospitalStaysFilter: Column = SsrCeSource.NIR_RET === "0" and SsrCeSource .NAI_RET === "0" and SsrCeSource.SEX_RET === "0" and SsrCeSource @@ -20,6 +28,14 @@ private[data] class SsrFilters(rawSsr: DataFrame) { rawSsr.filter(fictionalAndFalseHospitalStaysFilter) } + + /** Filter out Finess doublons. + * + * @return + */ + def filterSpecialHospitals: DataFrame = { + rawSsr.where(!SsrSource.ETA_NUM.isin(specialHospitalCodes: _*)) + } } private[data] object SsrFilters \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSource.scala index 787f074a..500cc119 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSource.scala @@ -15,22 +15,22 @@ object SsrSource extends DataSourceManager with SsrSourceSanitizer { val ETA_NUM: Column = col("ETA_NUM") val RHA_NUM: Column = col("RHA_NUM") val RHS_NUM: Column = col("RHS_NUM") - val MOR_PRP: Column = col("MOR_PRP") - val ETL_AFF: Column = col("ETL_AFF") - val MOI_ANN_SOR_SEJ: Column = col("MOI_ANN_SOR_SEJ") - val RHS_ANT_SEJ_ENT: Column = col("RHS_ANT_SEJ_ENT") - val FP_PEC: Column = col("FP_PEC") - - val NIR_RET: Column = col("SSR_C__NIR_RET") - val SEJ_RET: Column = col("SSR_C__SEJ_RET") - val FHO_RET: Column = col("SSR_C__FHO_RET") - val PMS_RET: Column = col("SSR_C__PMS_RET") - val DAT_RET: Column = col("SSR_C__DAT_RET") - val ENT_DAT: Column = col("SSR_C__ENT_DAT") - val SOR_DAT: Column = col("SSR_C__SOR_DAT") + val MOR_PRP: Column = col("SSR_B__MOR_PRP") + val ETL_AFF: Column = col("SSR_B__ETL_AFF") + val MOI_ANN_SOR_SEJ: Column = col("SSR_B__MOI_ANN_SOR_SEJ") + val RHS_ANT_SEJ_ENT: Column = col("SSR_B__RHS_ANT_SEJ_ENT") + val FP_PEC: Column = col("SSR_B__FP_PEC") + val GRG_GME: Column = col("SSR_B__GRG_GME") + val NIR_RET: Column = col("NIR_RET") + val SEJ_RET: Column = col("SEJ_RET") + val FHO_RET: Column = col("FHO_RET") + val PMS_RET: Column = col("PMS_RET") + val DAT_RET: Column = col("DAT_RET") + val ENT_DAT: Column = col("ENT_DAT") + val SOR_DAT: Column = col("SOR_DAT") val Year: Column = col("year") - override val EXE_SOI_DTD: Column = col("SSR_C__EXE_SOI_DTD") + override val EXE_SOI_DTD: Column = col("EXE_SOI_DTD") val foreignKeys: List[String] = List("ETA_NUM", "RHA_NUM", "year") @@ -40,28 +40,7 @@ object SsrSource extends DataSourceManager with SsrSourceSanitizer { * https://datainitiative.atlassian.net/wiki/pages/viewpage.action?pageId=40304642 */ rawSsr + .filterSpecialHospitals .filterSsrCorruptedHospitalStays } - - def read(sqlContext: SQLContext, path: List[String]): DataFrame = { - readAnnotateJoin(sqlContext, path, "SSR_C") - } - - private def readAnnotateJoin(sqlContext: SQLContext, paths: List[String], joinedTableName: String): DataFrame = { - val ssrSej = sqlContext.read.parquet(paths.head) - val ssrC = sqlContext.read.parquet(paths(1)) - ssrSej.join( - ssrC.addPrefixYear(joinedTableName, foreignKeys), foreignKeys, "left_outer") - } - - implicit class TableHelper(df: DataFrame) { - - def addPrefixYear(prefix: String, except: List[String]): DataFrame = { - val renamedColumns = df.columns.map { - case colName if !except.contains(colName) => prefix + "__" + colName - case keyCol => keyCol - } - df.toDF(renamedColumns: _*).withColumn("year", year(to_date(col("SSR_C__EXE_SOI_DTD")))) - } - } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/value/IrNatSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/value/IrNatSource.scala index 97125f62..20743b99 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/value/IrNatSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/value/IrNatSource.scala @@ -1,3 +1,4 @@ + // License: BSD 3 clause package fr.polytechnique.cmap.cnam.etl.sources.value From 566e7cab03a79ddf7f7c29ba089f04a401a4f838 Mon Sep 17 00:00:00 2001 From: thomashdh Date: Wed, 4 Mar 2020 17:25:50 +0100 Subject: [PATCH 21/38] DREES-108 : remove Sources/data/SsrSourceSuite test because not needed anymore + some comments DREES-108 : fix SsrSource HadSource and SsrFilter test --- .../cmap/cnam/etl/sources/Sources.scala | 2 +- .../cnam/etl/sources/data/DcirSource.scala | 2 +- .../cnam/etl/sources/data/HadFilters.scala | 4 +- .../etl/extractors/ssr/SsrSourceSuite.scala | 4 +- .../etl/sources/data/HadSourceSuite.scala | 26 ++----- .../etl/sources/data/SsrFiltersSuite.scala | 14 ++-- .../etl/sources/data/SsrSourceSuite.scala | 74 ------------------- 7 files changed, 21 insertions(+), 105 deletions(-) delete mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSourceSuite.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala index 0254c1f8..6d18dee1 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala @@ -69,7 +69,7 @@ object Sources { /** Read all source dataframe. * - * @param sqlContext + * @param sqlContext Spark Context needed to fetch data * @param paths * @return */ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirSource.scala index 1b6440b2..2272c55e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/DcirSource.scala @@ -15,7 +15,7 @@ object DcirSource extends DataSourceManager with DcirSourceSanitizer { /** Sanitize the dcir with usual filter for analysis * - remove the lines without a proper *Nature de la prestation* * - remove the lines for information - * @param dcir + * @param dcir the data source that will be sanitized * @return a new instance of the Source, with the sanitized data */ override def sanitize(dcir: DataFrame): DataFrame = { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala index e547e8bb..c0f24456 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala @@ -6,7 +6,7 @@ import org.apache.spark.sql.{Column, DataFrame} private[data] class HadFilters(rawHad: DataFrame) { /** Filter out Had corrupted stays as returned by the ATIH. * - * @return + * @return dataframe cleaned of HAD corrupted stays */ def filterHadCorruptedHospitalStays: DataFrame = { val fictionalAndFalseHospitalStaysFilter: Column = HadSource @@ -19,7 +19,7 @@ private[data] class HadFilters(rawHad: DataFrame) { /** Remove geographic finess doublons from APHP, APHM and HCL. * - * @return + * @return dataframe without finess doublons */ def filterSpecialHospitals: DataFrame = { rawHad.where(!HadSource.ETA_NUM_EPMSI.isin(specialHospitalCodes: _*)) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala index 32599047..5a0a16a6 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala @@ -22,8 +22,8 @@ class SsrSourceSuite extends SharedContext with SsrSource { ("Patient3", Some("C679"), Some("B672"), Some("C673"), None, Some(2011), Some(5)), ("MustBeDropped1", None, None, None, Some("31122011"), Some(2011), Some(12)) ).toDF( - "SSR_C__NUM_ENQ", "MOR_PRP", "ETL_AFF", "SSR_D__DGN_COD", - "SSR_C__ENT_DAT", "SSR_C__ANN_LUN_1S", "SSR_C__MOI_LUN_1S" + "NUM_ENQ", "SSR_B__MOR_PRP", "SSR_B__ETL_AFF", "SSR_D__DGN_COD", + "ENT_DAT", "ANN_LUN_1S", "MOI_LUN_1S" ) } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSourceSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSourceSuite.scala index 9274e78e..e28694c0 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSourceSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSourceSuite.scala @@ -13,30 +13,20 @@ class HadSourceSuite extends SharedContext { HadSource.SEJ_RET, HadSource.FHO_RET, HadSource.PMS_RET, - HadSource.DAT_RET + HadSource.DAT_RET, + HadSource.ETA_NUM_EPMSI ).map(col => col.toString) val input = Seq( - ("1", "1", "1", "1", "1"), - ("1", "1", "1", "1", "1"), - ("1", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0") + ("1", "0", "0", "0", "0", "100000000"), + ("1", "1", "0", "0", "0", "100000001"), + ("0", "0", "0", "0", "0", "100000001"), + ("0", "0", "0", "0", "0", "910100015") + ).toDF(colNames: _*) val expected = Seq( - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0") + ("0", "0", "0", "0", "0", "100000001") ).toDF(colNames: _*) // When diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFiltersSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFiltersSuite.scala index 6b332fe1..be45b2c1 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFiltersSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFiltersSuite.scala @@ -14,20 +14,20 @@ class SsrFiltersSuite extends SharedContext { SsrSource.SEJ_RET, SsrSource.FHO_RET, SsrSource.PMS_RET, - SsrSource.DAT_RET + SsrSource.DAT_RET, + SsrSource.GRG_GME ).map(col => col.toString) val input = Seq( - ("0", "0", "0", "0", "0"), - ("1", "1", "1", "1", "1"), - ("0", "0", "0", "0", "0"), - ("1", "0", "0", "0", "0") + ("0", "0", "0", "0", "0", "900000"), + ("1", "1", "1", "1", "1", "600000"), + ("0", "0", "0", "0", "0", "800000"), + ("1", "0", "0", "0", "0", "900000") ).toDF(colNames: _*) val expected = Seq( - ("0", "0", "0", "0", "0"), - ("0", "0", "0", "0", "0") //filtered + ("0", "0", "0", "0", "0", "800000") ).toDF(colNames: _*) // When diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSourceSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSourceSuite.scala deleted file mode 100644 index d3465571..00000000 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSourceSuite.scala +++ /dev/null @@ -1,74 +0,0 @@ -package fr.polytechnique.cmap.cnam.etl.sources.data - -import fr.polytechnique.cmap.cnam.SharedContext - -class SsrSourceSuite extends SharedContext { - "sanitize" should "return lines that are not corrupted" in { - val sqlCtx = sqlContext - import sqlCtx.implicits._ - - // Given - val colNames = List( - SsrSource.ETA_NUM, - SsrSource.RHA_NUM, - SsrSource.RHS_NUM, - SsrSource.MOR_PRP, - SsrSource.ETL_AFF, - SsrSource.MOI_ANN_SOR_SEJ, - SsrSource.RHS_ANT_SEJ_ENT, - SsrSource.FP_PEC, - SsrSource.NIR_RET, - SsrSource.SEJ_RET, - SsrSource.FHO_RET, - SsrSource.PMS_RET, - SsrSource.DAT_RET, - SsrSource.ENT_DAT, - SsrSource.SOR_DAT, - SsrSource.Year - - ).map(col => col.toString) - - val input = Seq( - ("10000123", "20000123", "123", "C66", "C24", "200910", "14", "Z15", "1", "1", "1", "1", "1", "14062008", "25082008", "2008"), - ("10000123", "20000123", "123", "C66", "C24", "200910", "14", "Z15", "1", "1", "1", "1", "1", "14062008", "25082008", "2008"), - ("10000123", "20000123", "123", "C66", "C24", "200910", "14", "Z15", "1", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "124", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "125", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "126", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "127", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "128", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "129", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "130", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008") - ).toDF(colNames: _*) - - - val expected = Seq( - ("10000123", "20000123", "124", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "125", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "126", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "127", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "128", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "129", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008"), - ("10000123", "20000123", "130", "C66", "C24", "200910", "14", "Z15", "0", "0", "0", "0", "0", "14062008", "25082008", "2008") - ).toDF(colNames: _*) - - // When - val result = SsrSource.sanitize(input) - - // Then - assertDFs(result, expected) - } - - "readAnnotateJoin" should "return annotated joined SSR given SSR_C and SSR_SEJ" in { - val sqlCtx = sqlContext - - val ssrSejPath = "src/test/resources/test-input/SSR_SEJ.parquet" - val ssrCPath = "src/test/resources/test-input/SSR_C.parquet" - val expected = sqlCtx.read.parquet("src/test/resources/test-joined/SSR.parquet") - val result = SsrSource.read( - sqlCtx, - List(ssrSejPath, ssrCPath)) - - assertDFs(result, expected) - } -} From 813750cc4fd2735fca23e77b059b57244b27aa41 Mon Sep 17 00:00:00 2001 From: thomashdh Date: Thu, 5 Mar 2020 12:07:59 +0100 Subject: [PATCH 22/38] DREES-108 : change Parquet input file to test-input/SSR_C.parquet --- .../etl/extractors/acts/SsrMedicalActsSuite.scala | 8 ++++---- .../etl/extractors/diagnoses/SsrDiagnosesSuite.scala | 12 ++++++------ .../SSrHospitalStayExtractorSuite.scala | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala index 8587be84..a54a5fa1 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala @@ -13,7 +13,7 @@ class SsrMedicalActsSuite extends SharedContext { // Given val ccamCodes = Set("AHQP001") - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet) val expected = Seq[Event[MedicalAct]]( SsrCCAMAct("Patient_02", "10000123_30000546_200_2019", "AHQP001", makeTS(2019, 8, 11)), SsrCCAMAct("Patient_02", "10000123_30000546_300_2019", "AHQP001", makeTS(2019, 8, 11)) @@ -32,7 +32,7 @@ class SsrMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val expected = Seq[Event[MedicalAct]]( SsrCCAMAct("Patient_02", "10000123_30000546_200_2019", "AHQP001", makeTS(2019, 8, 11)), SsrCCAMAct("Patient_01", "10000123_30000801_100_2019", "AHQP005", makeTS(2019, 10, 20)), @@ -54,7 +54,7 @@ class SsrMedicalActsSuite extends SharedContext { // Given val ccamCodes = Set("BLR+156") - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val expected = Seq[Event[MedicalAct]]( SsrCSARRAct("Patient_02", "10000123_30000546_200_2019", "BLR+156", makeTS(2019, 8, 11)), SsrCSARRAct("Patient_02", "10000123_30000546_300_2019", "BLR+156", makeTS(2019, 8, 11)) @@ -73,7 +73,7 @@ class SsrMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val expected = Seq[Event[MedicalAct]]( SsrCSARRAct("Patient_02", "10000123_30000546_200_2019", "BLR+156", makeTS(2019, 8, 11)), SsrCSARRAct("Patient_01", "10000123_30000801_100_2019", "AAR+254", makeTS(2019, 10, 20)), diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala index fcc8e363..99fb9190 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala @@ -13,7 +13,7 @@ class SsrDiagnosesSuite extends SharedContext { // Given val dpCodes = Set("C66") - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val sources = Sources(ssr = Some(ssr)) val expected = Seq[Event[Diagnosis]]( @@ -35,7 +35,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val sources = Sources(ssr = Some(ssr)) val expected = Seq[Event[Diagnosis]]( @@ -58,7 +58,7 @@ class SsrDiagnosesSuite extends SharedContext { // Given val linkedCodes = Set("C6") - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val sources = Sources(ssr = Some(ssr)) val expected = Seq[Event[Diagnosis]]( @@ -80,7 +80,7 @@ class SsrDiagnosesSuite extends SharedContext { // Given val associatedDiagnosis = Set("C6") - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val sources = Sources(ssr = Some(ssr)) val expected = Seq[Event[Diagnosis]]( @@ -101,7 +101,7 @@ class SsrDiagnosesSuite extends SharedContext { // Given val cim10Codes = Set("Z100") - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val expected = Seq[Event[Diagnosis]]( SsrTakingOverPurpose("Patient_02", "10000123_30000546_300_2019", "Z100", makeTS(2019, 8, 11)) ).toDS @@ -119,7 +119,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val expected = Seq[Event[Diagnosis]]( SsrTakingOverPurpose("Patient_02", "10000123_30000546_200_2019", "Z400", makeTS(2019, 8, 11)), SsrTakingOverPurpose("Patient_02", "10000123_30000546_300_2019", "Z100", makeTS(2019, 8, 11)), diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala index 6f8f52e4..12ade053 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala @@ -13,7 +13,7 @@ class SSrHospitalStayExtractorSuite extends SharedContext { val sqlCtx = sqlContext import sqlCtx.implicits._ - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val sources = Sources(ssr = Some(ssr)) val expected: Dataset[Event[HospitalStay]] = Seq( @@ -33,7 +33,7 @@ class SSrHospitalStayExtractorSuite extends SharedContext { val sqlCtx = sqlContext import sqlCtx.implicits._ - val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") + val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") val sources = Sources(ssr = Some(ssr)) val expected: Dataset[Event[HospitalStay]] = Seq( From dce824517bce6e4cda35b1f25c9228bbf8393d8c Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 10 Mar 2020 11:40:46 +0100 Subject: [PATCH 23/38] DREES-108 : upload new Parquet Test file with valid column name --- ...f7-a863-d035d32207e6-c000.snappy.parquet.crc | Bin 204 -> 0 bytes ...b1-bcfe-620d052a165a-c000.snappy.parquet.crc | Bin 0 -> 204 bytes ...9-44f7-a863-d035d32207e6-c000.snappy.parquet | Bin 24743 -> 0 bytes ...3-4ab1-bcfe-620d052a165a-c000.snappy.parquet | Bin 0 -> 24680 bytes .../extractors/acts/SsrMedicalActsSuite.scala | 8 ++++---- .../diagnoses/SsrDiagnosesSuite.scala | 12 ++++++------ .../SSrHospitalStayExtractorSuite.scala | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) delete mode 100644 src/test/resources/test-joined/SSR.parquet/.part-00000-38c76b26-d209-44f7-a863-d035d32207e6-c000.snappy.parquet.crc create mode 100644 src/test/resources/test-joined/SSR.parquet/.part-00000-4659fc3c-6a63-4ab1-bcfe-620d052a165a-c000.snappy.parquet.crc delete mode 100644 src/test/resources/test-joined/SSR.parquet/part-00000-38c76b26-d209-44f7-a863-d035d32207e6-c000.snappy.parquet create mode 100644 src/test/resources/test-joined/SSR.parquet/part-00000-4659fc3c-6a63-4ab1-bcfe-620d052a165a-c000.snappy.parquet diff --git a/src/test/resources/test-joined/SSR.parquet/.part-00000-38c76b26-d209-44f7-a863-d035d32207e6-c000.snappy.parquet.crc b/src/test/resources/test-joined/SSR.parquet/.part-00000-38c76b26-d209-44f7-a863-d035d32207e6-c000.snappy.parquet.crc deleted file mode 100644 index cc1ee6137a9bf9e74d3b5363683a4ec0c1833d4e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 204 zcmV;-05kt%a$^7h00ICtv*A%Ns4E_wprP=;i&z66A4v-|K+acY!A&N*wPW>C+wfA~(*N GY`bADp@!FmM91g zxK4ANF|<4cYS#LjQ_liZS9;w9QzyIH!E2)G!iQRu8y)x@%2o-Wvsriz3@O_U17Cu{ zJaJ^AIcHc{VVcomtT3LV1*gP;m>mGQ0AS$BHQ{6n*+Fcvst^g9<5fl z`k_ZF4}w|35KI`BV8ReWb{Ik!f`6DW8$u96Hh6~+W=(#CtQkUfHw;4vCWJ5>LI`_K z-RiEo_m*U}T0fTE4MVHzR@HY-oqF7=I;TBhTXax1%Zl6=kXvN4$<%$W!(=iI4#0nd zL+53ad`#{=C$~YFHo2ou>8RzfZx+*8!Bc>A^tHfeYrClxY7fZ60r&$Fl*duRd6Y0R zJX-&LVPFuX9hWUk+9;4oImlDgZ%_j&A3mj$rB)!y9db8S-rW74?Ix2!X)``m$R{Yv zrL;<#s?1jSJ~S|Ze;^nCKt5>UlS8zD5B~Blc$(b^ zChV!eayVbk1rqae@&<9IWh)hT*JIF=rlm|JcnJP_ z*5`6#0=gLsD2GV40X8gOps*jM+aPx$QH^iRqm{Pi#lyG? z`5}rw;lWpwLIkzDQl~)eeOB+O8kS|eDeYqaU^~|gWX430ryK^e#s^UnDJEI%1fIv)}nxph+>lj4v zD^e%q0!q7#n(?VZK5<22DR`_|HlZ3-wxbX2a<@v;ER%{EpDJd|M;c4P1MPa@kpmQW z=1MQ*hANIoZoag*v@tbb-kP%eHdp3xEUi_tlWE6_TVR8ZTAtD?s%;RdU!tm!Ts0fc z*{jttR5%{;By7QnihtRavXvJF0=u{x%28c;gSba!J8j{8n`l;NbSpbGoh@g|MfwI%ij6LaCW6wT*}X^jRnan9e4N_ zBJ-&&kvvfyrK48EAd-L0kVVWF7_&_VviVSAeJ-?`j-k0y0zpy=7A}2Z13n;!srjrg z;Tw20wrQKp$J1G-pS%rxQQL|ww&k13R-&8HwV;1hAnL@Hv6#}up$uaAVUBNNS1C1t z^4#lb&&F&yvf?SO=BgWW^Hq@zPY8-BW@H!y@oxqckS!**rne%o%1m~A$~k3E%*Kzw z zvNJ!oS@8G;vU^3flulO1AT}gNX}|u>%eYQ?D~?ibs!$HjFXU2N9&Gda=E|Di=3Aff z`0Sf`uTLQNn7$0`v9utA;DcLN&dTR#6M6&qS{t}9G%)y6{5Cj%-vs)$sX0oUT2`YR zZngPME{=xPKzwa$VRn6~5-cr`O-%dews6sPRI7p6Qpr^ijsrm@rIS_B2*phr*;A=v^#J4&gfddN^fr4zAH$zFo(Z(@)0ZNXHKuP{+1CMzrBO{;!zhlDP zVYL#8Y}yt*i_XB*oNFSp5yv*9oC1Z88_Oy!DW^s~?BjFt0BxlujtlY(zOAQM7jtt3 zXCP56Eja{mX+}AsDPzF$n-3hAp7Gp7d3q{Rtl$I=W}G>1d2M;IVoPUIK9PuH`Z7u< zW9%x9jMUY+aKQ`jgIHl@e!YV4hqZEWGM00sCdv^(i@LaM7^B+i26033C^g#2C$R@| zP81r0&6Lv@s7!jCnfy#vAf+YeRF3M38Q}Z^h=aQlDF$q|_@*sU-pbCVRu%)x32cyS zb}nC@Ex?XACJ@yts-|>tl?;OUnTLG_RNc(Or+e8v|L-QjjHbPR&m#8w`eMgD9@G*oRk04&4=rhad6;M#f+c0EVC3m;Fe`=>zX5- zlF!hNICkk@|LXzIegF1@?Vn~Z{_~6eZ*2N0p>)(rC>^yjpr_y|=&S9HetC$BxqAv% zM{l5*EzDF)@Ge~RZOgE&bDlNl_Q#>2JjjAECnBTB4@FRN5F_}%$HvG7evg( zs?t%*H{kdqD;gFDq8_4{@hR&jmNMY@Hv?SbnQWd&815$7*AYD&THUUe8d2z%K@C=a zY1rH_{F-g)~bZb^I*XFR+d@Qp>niSm*_$5tq)^k|D|)WkW+@8Pglf~iz#!Cx+; z!Pc_TH38R*&y|i^NduC9C!(UI&Di`*I1_VEy5iLm-oLrx!F<8)aZg0XMeoEOZ4ITH zFK-aXuOj#!0gZLbIomqHOxSTFWtRCf4;nhX?8d5&>2lP)Jf0~Amp2lNj>U97Jufgz zD->6HMTHG2^OsRL(>B)v)##Qx=1$C4?D)Qb>35N&XeXv!;HTxo+S{phv_ zq7M&_jFQo9i#|LwGAw9HJ1qsHTP>r(;^^itKXE%1!yz8cge$%oIBkpe1X4`tWCb)p z9EX?W$LJbyZ-uN5*#)0%dSeO)(B@KVJzbjGnw-gmrbTN|kG2G?9kjMV41ZWBU0f#j z*i(L|!{yk@u8upMi(4xKleDJGZsn-1oB_|T)OZfErBboN7_ND4(S>k}ds%5y%NS4| z*+&g>#)(EKR;PW@ihHseE{|<4Nymr1i}uKd!?qIfyJxea`-L8D6{Sa8!yu7gSFt;> zU-gzycSY>dN!=-7N%&9uI^pMKa|<9pHgH1b3Z7|4b!^O?cBPARo@%vd52KAYHudV_ z`)OlS=@r%3RbjlS0+?Qr&&rTs{s}GzBb8liNhU1Th4YC-7BT2BWw38DJT3Zd1q|R=2|Z=hpQWb z$#}p8fQR3}E^BSYQ@)@Cs>mSF74V|ch=4;Vo-fHsv^Su#a7NdTucH@WRcZ3(^Z#;n z{!EUY5l&T>GWkk!U`Ue^`BXbuS%rN!ZjtmZ+rdHgRtVRBxIfzI4ymJ-2QFe-11hiq ziSH*p8a%1wathId@ZS#CQ|JRGqEM>|*m9ql+N1}@;|py)2AWpF@g8Mt6TkOmT8eMcW|1F5ZaYau>W z%=n2Ku@qF`*20#P-#_8$F0ZCa%gOTGVrC^Jzb%>M?gv|xc30e%3?+gOg84`~>Zzw& zAm3cgq)PRyn4#!|D_XDWc7>9fbkt!;yMr!P9`d8EcrD%KhID*9oODGwX4I`?xRY9j zTgPzo3~WMId^(&&-JoeE8K1P<+^(c86r$~Nj60Jt*Hlf9cDvJ?glC$x(-p=v);=2b z#GrN@>iCkbP+X-P4UT(}nT(A`F{f4=E|OwN)PUKz)0T+q9(B=f06oWRHQRxm403v4 zIvky-=c6P(AJa}(te)?5*+CQ5wU{3BhA^L40riJ=P%BXVYqi^a;iPA>-dTqrsisr| zh8#(U$E#Lt8w)4HE=R3qdl2*pM~S`Y5DO%2V`KGvkIfExvV3Paph?>TUQOCD9@3`6 zaZNhvs@EsVh3sGlH}o#14r9s@vqht7x`U)0wxF6k5_Mq*g`>V?+!Lmza0lbHflh#~ zDLW1-YBo%T?7n)+!%~L>UR2HLb(!Q&5Z)1Uj7MEbl#P~XBjcdnL3hPspc<%>#GzRm z<|y$b%OUwrXd(7~0<=cZab3R6L1qzF<+5B;FkXT&+DjH)oiK=`^$@Ag#!0bDHnN@L zq|Z^RzBU>;>VoMeLQmMBzijn+uWF1vq_G3m>?Jy@S)>p7dCAwag~Q2MqCTWfaP?q@ zBm-VoqBi_^Cgz20@#NrcnM1WqnWJJ)dP8xSIvvpjKjLy70UIopU;~7QW9m@h1#^QE z+sW(e7!N{Q)M?Acf@%XC17JBAVgWK1^!YWBs%ENKUTe&+<)K{Ogj|neRu4AEY3PyK zW0+rjUy#R+WHRK%ey?|J{jtp}dJK!M__0s)7#l8PCxS3HBDGGciKM2|KPGFWI`+_b zssWNo*OUtuZdi8W&Ih|yyBV{_Fbi#?PTV9czTRXY5lRl~rgW0L2K%1lgBH#esGL*5%edxrM4qb^ru)MY6mkSueL_h;RkNy?3EPeD zPS|b`Ud6I$y=q3aSOnh5K4=ki0JiAGL%J5h=7LoG56SX{1ld^ij=^*SjlON;z;xF_ zz#mJNPbKTOZ3x~yW==r;nD1ZQ%hc5U2g&j)$@*~zQZ=!10?1c~dDQ>{n;oLs?q43?AWnd}+08KgE{&j`Z?U}CV!hdosJ+L# z38Z}5!=$vk)qNk4@N3EPTgm!q4@xk^xCubNIHm*DZ1|mIx!hv?#W4i!F@jBif8n?e zUo+|-CCkT>^@Zbz-z`*4!2Ik4V}gb2dsx!`ZOL*&vVL{~xjV(C37D^gQ@J*Y`8_aU zs&n~u8RjycB!zx90rMRTW7c-jgm+c4yee7Wu>g++rdeRTvy@4|`{Zuc?smElC%jK3 z%bz9dCwG%Rze6mVfV&OrNPVrP`+34uXZZF>t}~b{`lPFZkG{A4`c0d--0bs{Uv~xa*c@t(Wh;1QBO^(3wEP z=k8-9?l4n_Ja-?qT8%mrRR4jm&JI4L`bU!GmSp{b0Co*96L3GdpQ**4o(PzZp8HX= z-aAB+3E0~YFm~+}M!4!Cy!`-au`qBPlTjs|BqLi@91xh+}0KZCS8$A$^WUsyRZ zJq;4FIv&5U60+MEEJmZi1ju*K>X2}tBuI7beD^Ft?k&bkfZu+I!_)IAXN&EJ5P#<| zE&=(?b2>7dbP4$j$?~;iee)cW_YkNhK)>40=4d-TG!wKsH(%{1fy<`J{cfgb*C^a0r$Nj##PV#9QM5-#NIK`N`QTH zm@}hIy+Gj1_~tN}aYrYifcN_mRZZMi(klppdlpu<-;bb{d<+)9vD_YI4XoW(kgCs1 zmKP=K_9#|nK~@6UA3nll!xb92-yj6s(|-7fTE7;0#fyIb0wFu9!BzgryNa`!|?VllO_dqED_3F<#D!s_z zs8@fE)ZOEu1k`UH<*Dj*5TRa|EbmCxZyrUeF#bsZyY?6dqjyOJrmh^<9z)n3BAf*1 z*X-Po$HO-qn+aN7IbO5FkZ)xB^_GM2*u@q{e9O@w@efW$tY3|Bz&|(}1pdjzfZEG7 zj`}B8gVe9axIUuSbAlhF8af`$3L( z!`mS5mQTYYH;f$bmJfL7W@Di>EG60v-u5#dyQAcYxBU$gF9-YNPI^O7zvU#r8x?}IK?%C(W{CATy*DNJ-%6J2lJ#bY%y47WlYn$N%t*#7R{~W}FPFmvstJ1% z@Lr2BWA1!W%W+?efN@#ulLcN(EG6o$?I_b&y@n!}#N;X%?qmsCUEjB(sIoB*N}%?a zF{U;Ltz8lmsJbkD8EbI3K9^wn>TiPSH8a7htMhXS)L9r8B~bgFiJjT>j+(I5z0*4r z$kxY339z3{?u?~3p#-bG{63pR?Cud!0wLSe%;b8z;M)Lh$T_obPlMST*#LbsvvV8h z9Xr9Rr-6@Vh}n0Jj}kQCH;*$3>X^}7(-XRS>+ze%k-l3@lmP$T6AaJaj@PgHi2!w^ zfA<6mFvdg)ux}(8me-m(2?$iZ0DB{eP+d@zfcU|ajL6^B*L({EsV;mUJc-DiW1$4} zmuDHBH$&Y!LGbF?^W|B@i{qdK=#S@iM-LJ7i<0FkTu{y-nvaGO0KZH#An#dV{t!Zh z1Ni5X^~*G|#T_D`1X6C!vx&2dUxkQL*Y2D1^)(AGMl7%hs2eL16(taHdx44IjgIq@ zj6VXWw=WBII6@g5Q=$d@@)ENWKVE9S9m1wB$|Yj;9pj?}s^82q)zue}$UDU8e=`gE z9|Bc{VN(JLpD&9gpmETprTUOK37;=FXrXU&OaecSP=bN}ZLUH3l{~XQzXI?pyZSP^ zl1K9k!>a^#xlvHLc;u{k!l>((7~?&1Q+uOuX>qJgmutYl|6uNuNgtqr&tHwUfToc%XK3Nd|K#*Bp~%D|K&Q4EEah+q=~{346f^& zMlAJW60GXSyS|B7QS_An`Io8@Nj;z(@-J0_6hvJKkgq<)CayZQG~QE!RhQ&fpMqJ- zVy!Zp~pD%yH5j}#a|(sr9|jk&oDz6y|;w?tmS#hdg~c9MJ*;v(EQJy zHG(keM|JiR&`Xl#6>!%)3o5dJEL4%D1S(ptZCTn(C;OBi7fK8Lsnrz3Y4wwkt^RT` z*AKB<#kEYOzf{=hU&&Nb=~N}9{QJ;Trz_Rf%;}4#%azjFT;=q+)ALK2T)KSv;@>@W zI-gp>EE-w_d93VGegV??wOlSWn}b47J9F-I4Sf3IQ_nnet`4Bi)DwtYKlZGb-4P$JAKwYkk(CE~nzSZUc;6~j)Ko^b4KnZxoF01nCh1E|}P0roq(8KKcp ztp)@)Re!RH7Z~*zGWiW#Kn+u7fJO~K#%Va)vvVRgtI#4DcvMG^f~5L2#+>7e}? z5kofHUgjD)IHu|hIvC!8ta7_m4uC`4JWX?` zm#1kCb@DXLq4Te44t3=;&EbCi##rECXkgb#mMY#8G}a`E{kD#Q_A||3dNBV?qo^mI zF)2RkdimRx(y8@Ksk~IkpBn5R9Oxf7b#``bDVIJqaDFf~IywUXtm98J=V#$xE`Wd1 xad0R#cwzYb@Z89Sq1p5!!*h?EPyHBv1lVUX{r~@T7BKzeWAHCq!2dA&8PK-cmx>SS{W?7Mk0&<6JHktY_^q5Sh(GmDJ zI`*h+l26Ek7vwG|(kHpWP;tL1{BSRmdkO z%cZnRn&b)j3Q9MTwu1Cdkgjw>lcC^!%c{&y_&zo=f`1?v|3E%y;gdtOfe-!s9(bDD z3MTBSz-qWq$p;eInYDg+Xj_XX*7KfVEbWU5T6>S7lG4vt0Rn3}_K8#WP@!l4u@7?7 z;llK4w&)7@-Nzy6^%n}>kas0)^UrSD(gKNnTn**8uDn6qS=ma(-Sar~q-iBn4IY8N zp3TMlq=0V50?HARZGc^rFHzWc(p``{ov6jPvS_89#i^X%w^>UE?QsFMpU+pi)OrTI zReqS_PkFGHw%tJ-Yp$w&F`II&3b>t6MCqv)FaV#BpQ6CwH%~!sG34;Z7h`iF=X7M+ z8HvVXHGI62n29Xsmh!6}TWO}cyu6xS6399wD5jheAPs6fEDuvPV`Zh;MY>UqkbsJ20*eu=6@^0izz zZ?DxRQQ>&VlduJ+tNvA2%2rtx2<+o(D93f>4dQ+z+i43Q*haHDqdU3Txm+bvIR+`e zKff3Yta&GESz+UjX$mO4thPZA-^UW<8v7WZDrWq|USTPCpy}gYd4RsZj-0?!BL9CpK_vg0A&ZzVFln0!DSB-8*H-i3kfv8hE#$rkzhcbxehdI89UFFm? z%5!g~JzEQv$eO3Lp08~!W@{oFo)Q#O%*ZeZ;@=D?AXiH4%yv9utA;KN&2&dV2Q69xnLS{u1EHZpn{zm1OIH-WxgYL3#SmenYS zTWx-mi=$yZ5Z~BYTG(8v2Ft6H({ujC9b9xBwOU}ITy_19cLNSPVCr7s_*60D zCobceP#F%lU=R9PP6r3`k^+WO*+KU4h zaUbiLm>_>1!`Fm0tMMJZ-$bFqm5(Y5|kVthm!o!1|IWL#>YVge#eBn z!&)^G*|sfvmYspwMb~s@D~@eQIRy%xG?rC5QcjI}*vA*-5!y;i92ewyd|S`0FXtDF z&OoA8UU3NE(!6p^Q^tVhHy=1KJyZGV%G_+ERK*D#%sBJj%Esz))t1hrd?FDi^ktM@ z#@JUJ8L8`w;i4Db2eIN>cC(7_hmA^bCYE=krYjLai~6{17^B+i261EZ1U1^}r?3a| zP81r0&6Lv@sLpttnZkTdAf+SkRF3P48Q}Z^h=aQtDFtk{__i%k*~u-W)|LaS32cyS zVX;tID8i07CJ;3!s;2aDl?;OUnTLG_RNc(Orw7?Q|L-QjjHbPR-!k_5=5ooil!!!a zGqVfJeitqwo_Uw(7;{`#R54>gD9@G*oRk04&4=rhad6;M#f+c0EVC3m;Fe`=>)Ioo zkZMJX&Ub8km1CNs2JkugJO!V0B4@dVN5F_}(wE-!7e&nF zn$lCxH{kdqD;gFDq8_4{@hR&jmNMY@Hv?SbnQWd&815$7*Ks`@THURd8dvC-Q4Lmq zY1rN>!>_{F-mHBawmvUhJb6v=Sok#qyfpl6H(Ffb}Ty|&cxg^u6V7C_iwIvuu!yn+|!XM(K~TKTSMvR z%NxY;s|dbFKx5r<&bCf46ZYIlnPvXWgN9D8y0NNbt`c>xPGw5L)vd&`V>w+&X9Z^I zgyPDesIWn0{xS+@+V)1E7Ts~j+=*<}j_tr)|-mK#D27tbhiH zEuQ4vOTipu&qV>?uDG_eqlgcMH$f6Fi7OrRqRge zSG^_FT@kxplC;Yr@Zvo`T22RL)(KF|$O-{Pgu5@Y9Q>&HiVYKnqwq9L) zKW%L*gQ6PyDvTFZ0Mjib_*60DCoUu`1rPRo^|#mb!!siP7D1gtJH={v(LEX5!L=Z> z6kIFkL{~Z8G)w7aHJ}3D*CrV*akwLB$NAopd#xOGf&glB_b>b4NDsA3;{$H-n zpUJT^!l~*?rch0ejA>FLpK32FtFZ6JEt1}4J36Y~3gP+>_eXo(A@$Vrz(q`JKm|4+ z@%^MngC~_-P9b^}{_Sx+g+5?X4$*2U1DanZ|8>%2I_4HFllW9IV?HhwEVVE0HWYtQ zh{LCf8S@dxQU)Axw1WlE(TyX&~{{_YC1SklISO7UEOI zjGw3xOF;#0E$le`&6A$~%6h82nyf4?XVy~kTarocf2c$0cExSUP$Kvcn2)5Ro<_O@ z^3AnOs@%wm8H!H3qK&F*3 zbqqJpz$Wy?=fX+U4Vq?>@kzJM?Mm80A=)m-lrtG~&DQnkwmZE^c&14^U13aPZA&Z` zP1=)yivc7KReecUD6Ybe2dBJy>^!`ob=2z`s@fK z)s$+-kR$2vc-5-Ms9h$*$*{{&2Mo}sLC`83B}T+?R)6e@2a>kQ$$IH-kIfFsH;Q#T z!vRg&7Vv7)j;W9~9gb_#QCFir(KuuW2Ix?n1b(9<^PFI!_xP&LLL(%1oO4w81NS)>p7 zIm_3xg~Q2MqA{dSarI!@Bm-VoqCWh1Cgz20@#N@!nPc@#nWJJ)dP8xSI!Dn2-{Eo{ z0UIo%U;~7QW9m@h1#^QE+sW(ems%ENKUTe&+ z=b>EQgj|ne7MZ+GLyy!S!~Ej=f;@I4lOZqmd!uU`k8NJjV_1;IkA0%Y*l>>B4R17< z6p?!GkTsMWR^4QcREHVbsn0d*f)yK}+ zBTLumq0`?yVeJ```%jN|^_WjL-mr3qrOWb;Wcj^h?YYY0>ex4UgNq{UPG5kXlv(b; zlMwzfnXKi&~t`K4t2pa-d%us8wa^P{|K z0D*lCQSG;qWmmF(eiR|%C^!M_dY_I4(}B=_DOr9cS+DmYO&9wnz})I*7=A-WP(SIg zywG92)sLuyhqei%d^EtMbi36p9+B`X$?_Y?`q2PNFod%SKtDa91J!N#wPd;0Vg2a| z1RXF~O@RN|Ngcj!)IUj<4Bw=IlW-!K#2i<0FP$@;bhcq|6Z0^^;fOak7A z_p)|((=9jQeI!}_B3VDYm-P8v!p;QTU06pNYc1Wn6RtYLcTXdC|L8IS_2v6?RM?af z>K`S``;ztL`;fX%9GL)lXNVbek23_3p!V0DA+RrA>9K1*mJ-?b+Wkyp{V9ZSH!aUv zuibwYV$Aq3Gl8PdJ-|rZxupSl?g4DP8fPY`{w-gfopDI@_a)11$@(n;>>E-h;C}dB zrWSutB49d*zKfa--X(TSz}|h3vFise!d2Je-3LjFg`r{s+BfHPG&rsi+FvBg9m)F5 zIi%e^8caa`)XI_R!HqzWe?*sPxW;qrUh{1EC5$?~>j{qjdh6^20xVAmh# zVD#FEz|`gB`r`;YK%kQV{i>at2YAkg6EZ=o%gL*Dm

l&E9k{9=pThh;KTYB>ut4 zh>cq?4)_OWlfXZ_7*KnU#!>(5YLfc-B-cmuCXVplmMrhWk!ce9XpgujLBHJaFeSU` zMINW#4NsHm-Sp~sa23e$ZhD*K-S%mCv&^KsC`%0^X|;X3V|sWI67u5il+bfwI7Bils!|wHsv`b62ccZAXF&s*u_GdAsHV3WW2NS5eEPWPha=1R1VEXDWed&!d!KyK~Isdb{A;04~Eh zv+vG<*_+uFy+6Nq8|Za9!K+7v_veY(_YRX1G~m}yFbVv%F!f9jx_Tq>>nD)DUucv7 z|Mim$&tHT$ZuN-(b)RJ|N~J&90VY?Ofb$EO&Pzm~7N z5eQOU`2P45BKHo763|~-V07LL4MzmQs|V7T77#BEhZ3MaSlk~yM$j)vmKWjjauLyd zP?P}pS(*WPCjxVQ5F(t)Kas4TrHL)>5(p)bax2Rw&OR;_B1&DmZ)F>67T%UvU=dI^ zRwOt|AmYvv6TurD=Or0`1WvDHmKtz`GB~D03;4AaW+i^S)ZI9QO<$BN#Oiy7NeNWH zm1C-_FCdX4h|~X84)i|)stTj01QI@36-z+lp!Z637jY6kS#8onU*(wuejK3$1O2Of zll12c%>Miez_0A;%jo$6nqL@YC9uoQBIUw4l$`#UFN_%DU2=l1e~?xtk0Ja%L1?X`hTm}2ftIpzoxp~NPWBl{9EY&SAbuZndKTF)lt^~eOW#L z=!=RGL|uCg&=-{hfIhE^K*Sl?0DWFP0O-z!5k%dG4bYv91Aso;WT3jkkr31~{AZgu zq*&zDm?jEKFsg2Bb3E$JB|O!ucVioQqOdCg>#sGAMV*}->#sG!62w^vSg$gP^+RQm>3G!LXPbBN@ zXRy`vfGa^uKYo@YFuyJ#{6ez444#>1K`9nyg-WrMKq<@h9ZQ$#^pNtsVtHvewVs0L zt6>s|HC!p>han=Xw2`R}my27&Ynf^)ovNmk_r{(+Tdl2U&R#iNsg^eut7k8q&8}qf z>B`wF|M2wLLTU}OXrvJ2v9c?LB}f-G^7+(49tuJ2%!RXc@YyR*Kl99m27vmFTL)ld z7^}TfsAiTj_~5|uiF=q4ps`Ii0Q|141E@Q=4S?$TZ4Uq*%k}`^lWY$_WA564kRZMG z0BzcVgaNjvLECn~-7gL*m|9GerY8msPHclfy{nAS?9`%e)b;@2F>M`y0aL~pOU7&3 zs=ByLMrzhJEReHx2J?!x&fr2t+kmLCrR@R0yV*K`Ix*V=fXlLV0Ch^X2LRt=>j3Iu zWPnyjHzPDVs?{*xw(8Hc@gAejK_EYS9PimI|A>nNxG!gA>aH#K%5t^N6 z)Z4}YOBn z&7m%xL*NjPPTL&n&}p4Re?ZncfX=wq0n}^LI)M948-sv{p^2R(Lrk`cV857Sgk~oX zbHy~vc=8#O;-g-czh5h#+02wHE5*W@(c#gN;n6ea7dBS%=`*QIm($rx*~^z79b1^l zWG5ybeKb8XF*0#EGd{9-dEv3kkBuy37Sf~NgP-;dnN0uxpUwiNg~#E4ngaem2L2nO C`6pBW literal 0 HcmV?d00001 diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala index a54a5fa1..8587be84 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala @@ -13,7 +13,7 @@ class SsrMedicalActsSuite extends SharedContext { // Given val ccamCodes = Set("AHQP001") - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet) + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[MedicalAct]]( SsrCCAMAct("Patient_02", "10000123_30000546_200_2019", "AHQP001", makeTS(2019, 8, 11)), SsrCCAMAct("Patient_02", "10000123_30000546_300_2019", "AHQP001", makeTS(2019, 8, 11)) @@ -32,7 +32,7 @@ class SsrMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[MedicalAct]]( SsrCCAMAct("Patient_02", "10000123_30000546_200_2019", "AHQP001", makeTS(2019, 8, 11)), SsrCCAMAct("Patient_01", "10000123_30000801_100_2019", "AHQP005", makeTS(2019, 10, 20)), @@ -54,7 +54,7 @@ class SsrMedicalActsSuite extends SharedContext { // Given val ccamCodes = Set("BLR+156") - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[MedicalAct]]( SsrCSARRAct("Patient_02", "10000123_30000546_200_2019", "BLR+156", makeTS(2019, 8, 11)), SsrCSARRAct("Patient_02", "10000123_30000546_300_2019", "BLR+156", makeTS(2019, 8, 11)) @@ -73,7 +73,7 @@ class SsrMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[MedicalAct]]( SsrCSARRAct("Patient_02", "10000123_30000546_200_2019", "BLR+156", makeTS(2019, 8, 11)), SsrCSARRAct("Patient_01", "10000123_30000801_100_2019", "AAR+254", makeTS(2019, 10, 20)), diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala index 99fb9190..fcc8e363 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala @@ -13,7 +13,7 @@ class SsrDiagnosesSuite extends SharedContext { // Given val dpCodes = Set("C66") - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) val expected = Seq[Event[Diagnosis]]( @@ -35,7 +35,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) val expected = Seq[Event[Diagnosis]]( @@ -58,7 +58,7 @@ class SsrDiagnosesSuite extends SharedContext { // Given val linkedCodes = Set("C6") - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) val expected = Seq[Event[Diagnosis]]( @@ -80,7 +80,7 @@ class SsrDiagnosesSuite extends SharedContext { // Given val associatedDiagnosis = Set("C6") - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) val expected = Seq[Event[Diagnosis]]( @@ -101,7 +101,7 @@ class SsrDiagnosesSuite extends SharedContext { // Given val cim10Codes = Set("Z100") - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[Diagnosis]]( SsrTakingOverPurpose("Patient_02", "10000123_30000546_300_2019", "Z100", makeTS(2019, 8, 11)) ).toDS @@ -119,7 +119,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[Diagnosis]]( SsrTakingOverPurpose("Patient_02", "10000123_30000546_200_2019", "Z400", makeTS(2019, 8, 11)), SsrTakingOverPurpose("Patient_02", "10000123_30000546_300_2019", "Z100", makeTS(2019, 8, 11)), diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala index 12ade053..6f8f52e4 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala @@ -13,7 +13,7 @@ class SSrHospitalStayExtractorSuite extends SharedContext { val sqlCtx = sqlContext import sqlCtx.implicits._ - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) val expected: Dataset[Event[HospitalStay]] = Seq( @@ -33,7 +33,7 @@ class SSrHospitalStayExtractorSuite extends SharedContext { val sqlCtx = sqlContext import sqlCtx.implicits._ - val ssr = spark.read.parquet("src/test/resources/test-input/SSR_C.parquet") + val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) val expected: Dataset[Event[HospitalStay]] = Seq( From 0bf239989e296c55f6c490bcdfe9c69bb413e1f4 Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Tue, 24 Mar 2020 15:12:15 +0100 Subject: [PATCH 24/38] CNAM-450: First working version of the new Bulk. CNAM-450: Update ExtractorSources to include the name. CNAM-450: Format files. --- .../resources/config/bulk/paths/cmap.conf | 8 +- .../extractors/acts/McoCeActExtractor.scala | 7 + .../etl/extractors/mcoCe/McoCeExtractor.scala | 2 - .../etl/extractors/mcoCe/McoCeSource.scala | 22 +- .../ngapacts/DcirNgapActExtractor.scala | 107 ++++----- .../ngapacts/McoCeNgapActExtractor.scala | 81 +++---- .../ngapacts/NgapActClassConfig.scala | 10 +- .../extractors/ngapacts/NgapActConfig.scala | 5 +- ...PractitionerClaimSpecialityExtractor.scala | 10 +- .../cmap/cnam/study/bulk/BulkConfig.scala | 10 +- .../cmap/cnam/study/bulk/BulkMain.scala | 204 ++---------------- .../bulk/extractors/DcirSourceExtractor.scala | 41 ++++ .../bulk/extractors/HadSourceExtractor.scala | 30 +++ .../bulk/extractors/ImbSourceExtractor.scala | 16 ++ .../extractors/McoCeSourceExtractor.scala | 40 ++++ .../bulk/extractors/McoSourceExtractor.scala | 27 +++ .../bulk/extractors/PatientExtractor.scala | 27 +++ .../PmsiHospitalStaysExtractor.scala | 14 -- .../bulk/extractors/SourceExtractor.scala | 80 +++++++ .../extractors/SsrCeSourceExtractor.scala | 16 ++ .../bulk/extractors/SsrSourceExtractor.scala | 24 +++ .../McoHospitalStayExtractorSuite.scala | 2 +- .../ngapacts/DcirNgapActsExtractorSuite.scala | 7 +- .../ngapacts/McoNgapActsExtractorSuite.scala | 2 +- .../DrugPrescriptionTransformerSuite.scala | 2 +- 25 files changed, 463 insertions(+), 331 deletions(-) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PatientExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PmsiHospitalStaysExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractor.scala diff --git a/src/main/resources/config/bulk/paths/cmap.conf b/src/main/resources/config/bulk/paths/cmap.conf index 295b2c5f..4e84e3d9 100644 --- a/src/main/resources/config/bulk/paths/cmap.conf +++ b/src/main/resources/config/bulk/paths/cmap.conf @@ -1,10 +1,10 @@ env_name = "cmap" input = { - dcir = "/shared/Observapur/staging/Flattening/flat_table/DCIR" - mco_ce = "/shared/Observapur/staging/Flattening/flat_table/MCO_CE" - mco = "/shared/Observapur/staging/Flattening/flat_table/MCO" - ir_ben = "/shared/Observapur/staging/Flattening/single_table/IR_BEN_R" + dcir = "/user/ds/CNAM447/flattening/flat_table/DCIR" + mco_ce = "/user/ds/CNAM447bis/flattening/flat_table/MCO_CE" + mco = "/user/ds/CNAM447bis/flattening/flat_table/MCO" + ir_ben = "/user/ds/CNAM447/flattening/single_table/IR_BEN_R" ir_imb = "/shared/Observapur/staging/Flattening/single_table/IR_IMB_R" ir_pha = "/shared/Observapur/staging/Flattening/single_table/IR_PHA_R_MOL" } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala index 7abf6609..553c9458 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala @@ -3,10 +3,17 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, McoCEAct, MedicalAct} import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources object McoCeActExtractor extends McoCeExtractor[MedicalAct] { val columnName: String = ColNames.CamCode override val eventBuilder: EventBuilder = McoCEAct + + override def getInput(sources: Sources): DataFrame = { + sources.mcoCe.get.select((ColNames.CamCode :: ColNames.core).map(col): _*) + } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala index 38dafe36..03e53530 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala @@ -15,8 +15,6 @@ trait McoCeExtractor[EventType <: AnyEvent] extends Extractor[EventType] with Mc val eventBuilder: EventBuilder - def getInput(sources: Sources): DataFrame = sources.mcoCe.get.select(ColNames.all.map(col): _*) - def isInStudy(codes: Set[String]) (row: Row): Boolean = codes.exists(code(row).startsWith(_)) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala index cee6a3d5..4bb1e0b3 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala @@ -9,24 +9,32 @@ import fr.polytechnique.cmap.cnam.util.ColumnUtilities.parseTimestamp trait McoCeSource extends ColumnNames { final object ColNames extends Serializable { + // Essential for all the Extractors val PatientID: ColName = "NUM_ENQ" val EtaNum: ColName = "ETA_NUM" val SeqNum : ColName = "SEQ_NUM" - val CamCode = "MCO_FMSTC__CCAM_COD" val Date = "EXE_SOI_DTD" + val Year = "year" + + // For the Act extractor + val CamCode = "MCO_FMSTC__CCAM_COD" + + // NGAP from FBSTC val NgapKeyLetterFbstc = "MCO_FBSTC__ACT_COD" val NgapCoefficientFbstc = "MCO_FBSTC__ACT_COE" + + // Practionner from FBSTC val PractitionnerSpecialtyFbstc = "MCO_FBSTC__EXE_SPE" + + // NGAP for FSCTC val NgapKeyLetterFcstc = "MCO_FCSTC__ACT_COD" val NgapCoefficientFcstc = "MCO_FCSTC__ACT_COE" + + // Practionner from FCSTC val PractitionnerSpecialtyFcstc = "MCO_FCSTC__EXE_SPE" - val Year = "year" - val all = List( - PatientID, EtaNum, SeqNum, Year, CamCode, Date, - NgapKeyLetterFbstc, NgapCoefficientFbstc, PractitionnerSpecialtyFbstc, - NgapKeyLetterFcstc, NgapCoefficientFcstc, PractitionnerSpecialtyFcstc - ) + val core = List(PatientID, EtaNum, SeqNum, Date, Year) + } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala index a13a0412..133d140b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala @@ -1,8 +1,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts import scala.reflect.runtime.universe._ -import org.apache.spark.sql.{Column, DataFrame, Dataset, Row} import org.apache.spark.sql.functions.col +import org.apache.spark.sql.{Column, DataFrame, Dataset, Row} import fr.polytechnique.cmap.cnam.etl.events.{DcirNgapAct, Event, EventBuilder, NgapAct} import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources @@ -12,45 +12,19 @@ class DcirNgapActExtractor(ngapActsConfig: NgapActConfig) extends DcirExtractor[ private final val PrivateInstitutionCodes = Set(4D, 5D, 6D, 7D) override val columnName: String = ColNames.NaturePrestation - val ngapKeyLetter: String = "PRS_NAT_CB2" override val eventBuilder: EventBuilder = DcirNgapAct - - override def getInput(sources: Sources): DataFrame = { - - val neededColumns: List[Column] = List( - ColNames.PatientID, ColNames.NaturePrestation, ColNames.NgapCoefficient, - ColNames.Date, ColNames.ExecPSNum, ColNames.DcirFluxDate, ngapKeyLetter, - ColNames.Sector, ColNames.GHSCode, ColNames.InstitutionCode - ).map(colName => col(colName)) - - lazy val irNat = sources.irNat.get - lazy val dcir = sources.dcir.get - - lazy val df: DataFrame = dcir.join(irNat, dcir("PRS_NAT_REF").cast("String") === irNat("PRS_NAT")) - df.select(neededColumns: _*) - } - - override def isInExtractorScope(row: Row): Boolean = { - !row.isNullAt(row.fieldIndex(ngapKeyLetter)) - } - - override def isInStudy(codes: Set[String])(row: Row): Boolean = { - dcirIsInCategory( - ngapActsConfig.actsCategories, - row - ) - } + val ngapKeyLetter: String = "PRS_NAT_CB2" /** - * We extract Ngap acts as a concatenation of three different ways to identify specific ngap acts in the SNDS : - * - prestation type (ngapPrsNatRefs: PRS_NAT_REF), - * - prestation coefficient (ngapKeyLetters : PRS_NAT_CB2 or ACT_COD in the PMSI_CE), - * - prestation coefficient (ngapCoefficients: PRS_ACT_CFT or ACT_COE in the PMSI_CE) - * - * For more information, Cf NgapActConfig documentation. - * - * @return concatenation of the three codes - */ + * We extract Ngap acts as a concatenation of three different ways to identify specific ngap acts in the SNDS : + * - prestation type (ngapPrsNatRefs: PRS_NAT_REF), + * - prestation coefficient (ngapKeyLetters : PRS_NAT_CB2 or ACT_COD in the PMSI_CE), + * - prestation coefficient (ngapCoefficients: PRS_ACT_CFT or ACT_COE in the PMSI_CE) + * + * For more information, Cf NgapActConfig documentation. + * + * @return concatenation of the three codes + */ override def code: Row => String = (row: Row) => { row.getAs[Int](ColNames.NaturePrestation).toString + "_" + row.getAs[String](ngapKeyLetter) + "_" + @@ -107,21 +81,48 @@ class DcirNgapActExtractor(ngapActsConfig: NgapActConfig) extends DcirExtractor[ }.flatMap(builder _).distinct() } + override def getInput(sources: Sources): DataFrame = { + + val neededColumns: List[Column] = List( + ColNames.PatientID, ColNames.NaturePrestation, ColNames.NgapCoefficient, + ColNames.Date, ColNames.ExecPSNum, ColNames.DcirFluxDate, ngapKeyLetter, + ColNames.Sector, ColNames.GHSCode, ColNames.InstitutionCode + ).map(colName => col(colName)) + + lazy val irNat = sources.irNat.get + lazy val dcir = sources.dcir.get + + lazy val df: DataFrame = dcir.join(irNat, dcir("PRS_NAT_REF").cast("String") === irNat("PRS_NAT")) + df.select(neededColumns: _*) + } + + override def isInExtractorScope(row: Row): Boolean = { + !row.isNullAt(row.fieldIndex(ngapKeyLetter)) + } + + override def isInStudy(codes: Set[String])(row: Row): Boolean = { + dcirIsInCategory( + ngapActsConfig.actsCategories, + row + ) + } + def dcirIsInCategory( - categories: List[NgapActClassConfig], - row: Row): Boolean = { - - val ngapKeyLetter : String = row.getAs[String]("PRS_NAT_CB2") - val ngapCoefficient : String = row.getAs[Double]("PRS_ACT_CFT").toString - val prsNatRef: String = row.getAs[Int]("PRS_NAT_REF").toString - - categories - .exists(category => - ( - category.ngapKeyLetters.contains(ngapKeyLetter) && - category.ngapCoefficients.contains(ngapCoefficient) - ) || - category.ngapPrsNatRefs.contains(prsNatRef) - ) - } + categories: List[NgapActClassConfig], + row: Row): Boolean = { + + val ngapKeyLetter: String = row.getAs[String]("PRS_NAT_CB2") + val ngapCoefficient: String = row.getAs[Double]("PRS_ACT_CFT").toString + val prsNatRef: String = row.getAs[Int]("PRS_NAT_REF").toString + + categories + .exists( + category => + ( + category.ngapKeyLetters.contains(ngapKeyLetter) && + category.ngapCoefficients.contains(ngapCoefficient) + ) || + category.ngapPrsNatRefs.contains(prsNatRef) + ) + } } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala index 9c86488f..b323f6e8 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala @@ -2,8 +2,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts import scala.reflect.runtime.universe._ import scala.util.Try +import org.apache.spark.sql.functions.col import org.apache.spark.sql.{DataFrame, Dataset, Row} -import fr.polytechnique.cmap.cnam.etl.events.{Event, EventBuilder, McoCeFbstcNgapAct, McoCeFcstcNgapAct, NgapAct} +import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources @@ -14,15 +15,6 @@ trait McoCeNgapActExtractor extends McoCeExtractor[NgapAct] { val columnName: String = keyLetterColumn - override def isInStudy(codes: Set[String])(row: Row): Boolean = { - pmsiIsInCategories( - ngapActsConfig.actsCategories, - keyLetterColumn, - coeffColumn, - row - ) - } - override def code: Row => String = (row: Row) => { val coeff = Try(row.getAs[Double](coeffColumn).toString) recover { case _: NullPointerException => "0" @@ -46,36 +38,32 @@ trait McoCeNgapActExtractor extends McoCeExtractor[NgapAct] { else { input.filter(isInExtractorScope _).filter(isInStudy(codes) _) } - }.flatMap(builder _).distinct() + }.flatMap(builder _).distinct() } - def pmsiIsInCategory( - category: NgapActClassConfig, - ngapLetter: String, - ngapCoeff: String): Boolean = { - if (category.ngapCoefficients.isEmpty) { - category.ngapKeyLetters.contains(ngapLetter) - } - else { - category.ngapCoefficients.contains(ngapCoeff) && - category.ngapKeyLetters.contains(ngapLetter) - } + override def isInStudy(codes: Set[String])(row: Row): Boolean = { + pmsiIsInCategories( + ngapActsConfig.actsCategories, + keyLetterColumn, + coeffColumn, + row + ) } /** User could be interested by different Ngap categories each defined by a list of key letters - * and a list of coefficients. This function iterates over each category. More détails in the NgapActConfig class. - * - * @param categories : A list of Ngap prestation and coefficient codes - * @param ngapKeyColumn : the Ngap prestation code for MCO CE - * @param ngapCoeffColumn : the Ngap coefficient which complete the prestation code for MCO CE - * @param row - * @return - */ + * and a list of coefficients. This function iterates over each category. More détails in the NgapActConfig class. + * + * @param categories : A list of Ngap prestation and coefficient codes + * @param ngapKeyColumn : the Ngap prestation code for MCO CE + * @param ngapCoeffColumn : the Ngap coefficient which complete the prestation code for MCO CE + * @param row + * @return + */ def pmsiIsInCategories( - categories: List[NgapActClassConfig], - ngapKeyColumn: String, - ngapCoeffColumn: String, - row: Row): Boolean = { + categories: List[NgapActClassConfig], + ngapKeyColumn: String, + ngapCoeffColumn: String, + row: Row): Boolean = { val letter = row.getAs[String](ngapKeyColumn) val coeff = Try(row.getAs[Double](ngapCoeffColumn).toString) recover { @@ -85,20 +73,37 @@ trait McoCeNgapActExtractor extends McoCeExtractor[NgapAct] { categories .exists(category => pmsiIsInCategory(category, letter, coeff.get)) } + + def pmsiIsInCategory( + category: NgapActClassConfig, + ngapLetter: String, + ngapCoeff: String): Boolean = { + if (category.ngapCoefficients.isEmpty) { + category.ngapKeyLetters.contains(ngapLetter) + } + else { + category.ngapCoefficients.contains(ngapCoeff) && + category.ngapKeyLetters.contains(ngapLetter) + } + } + + override def getInput(sources: Sources): DataFrame = { + sources.mcoCe.get.select((coeffColumn :: keyLetterColumn :: ColNames.core).map(col): _*) + } } class McoCeFbstcNgapActExtractor(ngapConfig: NgapActConfig) extends McoCeNgapActExtractor { - val ngapActsConfig: NgapActConfig = ngapConfig val keyLetterColumn: String = ColNames.NgapKeyLetterFbstc - val coeffColumn: String = ColNames.NgapCoefficientFbstc override val columnName: String = keyLetterColumn override val eventBuilder: EventBuilder = McoCeFbstcNgapAct + val ngapActsConfig: NgapActConfig = ngapConfig + val coeffColumn: String = ColNames.NgapCoefficientFbstc } class McoCeFcstcNgapActExtractor(ngapConfig: NgapActConfig) extends McoCeNgapActExtractor { - val ngapActsConfig: NgapActConfig = ngapConfig val keyLetterColumn: String = ColNames.NgapKeyLetterFcstc - val coeffColumn: String = ColNames.NgapCoefficientFcstc override val columnName: String = keyLetterColumn override val eventBuilder: EventBuilder = McoCeFcstcNgapAct + val ngapActsConfig: NgapActConfig = ngapConfig + val coeffColumn: String = ColNames.NgapCoefficientFcstc } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala index 3f7949a7..0bfe63dc 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala @@ -1,12 +1,12 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts trait NgapActClassConfig extends Serializable { - """ + """ ngapCoefficients should always be specified with the dot separation for float, as this is how they are coded in the snds. eg: "2.0" should be used instead of "2" """.stripMargin - //val name: String - val ngapKeyLetters: Seq[String] - val ngapCoefficients: Seq[String] - val ngapPrsNatRefs: Seq[String] = Seq() + //val name: String + val ngapKeyLetters: Seq[String] + val ngapCoefficients: Seq[String] + val ngapPrsNatRefs: Seq[String] = Seq() } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala index 6df9922c..b566999d 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala @@ -1,7 +1,5 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts -import scala.util.Try -import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig /** @@ -21,6 +19,7 @@ import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig * search where ngapCoefficient is available * - if a list of ngapKeyLetters and a list of ngapCoefficients is given, it extracts all combination of (keyLetter, coefficient) * - if the list of ngapCoefficients is empty, extract all acts where coeff is in ngapCoefficient + * * @param actsCategories List of configuration to get specific NgapActs */ class NgapActConfig( @@ -28,7 +27,7 @@ class NgapActConfig( } object NgapActConfig { - def apply(actsCategories: List[NgapActClassConfig]): NgapActConfig= new NgapActConfig( + def apply(actsCategories: List[NgapActClassConfig]): NgapActConfig = new NgapActConfig( actsCategories ) } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala index 95dab891..bddbf5a1 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala @@ -3,13 +3,13 @@ package fr.polytechnique.cmap.cnam.etl.extractors.prestations import java.sql.Timestamp - import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirExtractor import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeExtractor -import org.apache.spark.sql.Row - +import org.apache.spark.sql.{DataFrame, Row} import scala.util.Try +import org.apache.spark.sql.functions.col +import fr.polytechnique.cmap.cnam.etl.sources.Sources /** * Get specialties of medical practitionner in the Dcir: @@ -88,6 +88,10 @@ trait McoCeSpecialtyExtractor extends McoCeExtractor[PractitionerClaimSpeciality override def isInExtractorScope(row: Row): Boolean = { (!row.isNullAt(row.fieldIndex(columnName))) & (row.getAs[Integer](columnName) != 0) } + + override def getInput(sources: Sources): DataFrame = { + sources.mcoCe.get.select((columnName :: ColNames.core).map(col): _*) + } } object McoCeFbstcSpecialtyExtractor extends McoCeSpecialtyExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala index fab6df79..edaaac65 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala @@ -24,15 +24,15 @@ object BulkConfig extends BulkConfigLoader { loadConfigWithDefaults[BulkConfig](path, defaultPath, env) } + final case class DrugsConfig( + override val level: DrugClassificationLevel = Cip13Level, + override val families: List[DrugClassConfig] = List.empty + ) extends DrugConfig(level = level, families = families) + final object BaseConfig extends BaseConfig( ageReferenceDate = LocalDate.of(2011, 1, 1), studyStart = LocalDate.of(2010, 1, 1), studyEnd = LocalDate.of(2015, 1, 1) ) - final case class DrugsConfig( - override val level: DrugClassificationLevel = Cip13Level, - override val families: List[DrugClassConfig] = List.empty - ) extends DrugConfig(level = level, families = families) - } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala index 9eee1709..d2ba80b8 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala @@ -3,19 +3,12 @@ package fr.polytechnique.cmap.cnam.study.bulk import java.io.PrintWriter -import scala.collection.mutable import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main -import fr.polytechnique.cmap.cnam.etl.extractors.acts.{DcirMedicalActExtractor, McoCcamActExtractor, McoCeActExtractor, McoCimMedicalActExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.classifications.GhmExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.DrugExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.study.bulk.extractors.PmsiHospitalStaysExtractor -import fr.polytechnique.cmap.cnam.util.Path -import fr.polytechnique.cmap.cnam.util.reporting.{MainMetadata, OperationMetadata, OperationReporter, OperationTypes} +import fr.polytechnique.cmap.cnam.study.bulk.extractors._ +import fr.polytechnique.cmap.cnam.util.reporting.MainMetadata object BulkMain extends Main { override def appName: String = "BulkMain" @@ -24,7 +17,6 @@ object BulkMain extends Main { sqlContext: SQLContext, argsMap: Map[String, String]): Option[Dataset[_]] = { - val format = new java.text.SimpleDateFormat("yyyy_MM_dd_HH_mm_ss") val startTimestamp = new java.util.Date() val bulkConfig = BulkConfig.load(argsMap("conf"), argsMap("env")) @@ -32,187 +24,21 @@ object BulkMain extends Main { import implicits.SourceReader val sources = Sources.sanitize(sqlContext.readSources(bulkConfig.input)) - val operationsMetadata = mutable.Buffer[OperationMetadata]() - - val drugs = new DrugExtractor(bulkConfig.drugs).extract(sources, Set.empty).cache() - - operationsMetadata += { - OperationReporter.report( - "DrugPurchases", - List("DCIR"), - OperationTypes.Dispensations, - drugs.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - - drugs.unpersist() - - val hospitalStays = PmsiHospitalStaysExtractor.extract(sources).cache() - operationsMetadata += { - OperationReporter - .report( - "HospitalStays", - List("MCO"), - OperationTypes.HospitalStays, - hospitalStays.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - - val dcirMedicalAct = DcirMedicalActExtractor.extract(sources, Set.empty).cache() - - operationsMetadata += { - OperationReporter.report( - "DCIRMedicalAct", - List("DCIR"), - OperationTypes.MedicalActs, - dcirMedicalAct.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - - dcirMedicalAct.unpersist() - - - val cimMedicalAct = McoCimMedicalActExtractor.extract(sources, Set.empty).cache() - - operationsMetadata += { - OperationReporter.report( - "CIM-Medical-Acts", - List("MCO"), - OperationTypes.MedicalActs, - cimMedicalAct.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - - cimMedicalAct.unpersist() - - - val ccamMedicalAct = McoCcamActExtractor.extract(sources, Set.empty).cache() - - operationsMetadata += { - OperationReporter.report( - "CCAM-Medical-Acts", - List("MCO"), - OperationTypes.MedicalActs, - ccamMedicalAct.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - - ccamMedicalAct.unpersist() - - - val liberalActs = McoCeActExtractor.extract(sources, Set.empty).cache() - - operationsMetadata += { - OperationReporter.report( - "McoCEMedicalActs", - List("MCO_ACE"), - OperationTypes.MedicalActs, - liberalActs.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - - liberalActs.unpersist() - - val imbActs = ImbDiagnosisExtractor.extract(sources, Set.empty).cache() - - operationsMetadata += { - OperationReporter.report( - "ImbDiagnoses", - List("IR_IMB_R"), - OperationTypes.MedicalActs, - imbActs.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - - imbActs.unpersist() - - val classification = GhmExtractor.extract(sources, Set.empty).cache() - - operationsMetadata += { - OperationReporter.report( - "GHM", - List("MCO"), - OperationTypes.AnyEvents, - classification.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - - classification.unpersist() - - val mainDiag = McoMainDiagnosisExtractor.extract(sources, Set.empty).cache() - - operationsMetadata += { - OperationReporter.report( - "MainDiagnosis", - List("MCO"), - OperationTypes.Diagnosis, - mainDiag.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - mainDiag.unpersist() - - val linkedDiag = McoLinkedDiagnosisExtractor.extract(sources, Set.empty).cache() - - operationsMetadata += { - OperationReporter.report( - "LinkedDiagnosis", - List("MCO"), - OperationTypes.Diagnosis, - linkedDiag.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - linkedDiag.unpersist() - - val associatedDiag = McoAssociatedDiagnosisExtractor.extract(sources, Set.empty).cache() - operationsMetadata += { - OperationReporter.report( - "AssociatedDiagnosis", - List("MCO"), - OperationTypes.Diagnosis, - associatedDiag.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - associatedDiag.unpersist() - - - val patients = new Patients(PatientsConfig(bulkConfig.base.studyStart)).extract(sources).cache() - operationsMetadata += { - OperationReporter.report( - "BasePopulation", - List("IR_BEN", "DCIR", "MCO", "MCO_CE"), - OperationTypes.Patients, - patients.toDF, - Path(bulkConfig.output.outputSavePath), - bulkConfig.output.saveMode - ) - } - patients.unpersist() - + val sourceExtractor: List[SourceExtractor] = List( + new DcirSourceExtractor(bulkConfig.output.root, bulkConfig.output.saveMode, bulkConfig.drugs), + new McoSourceExtractor(bulkConfig.output.root, bulkConfig.output.saveMode), + new McoCeSourceExtractor(bulkConfig.output.root, bulkConfig.output.saveMode), + new SsrSourceExtractor(bulkConfig.output.root, bulkConfig.output.saveMode), + new SsrCeSourceExtractor(bulkConfig.output.root, bulkConfig.output.saveMode), + new HadSourceExtractor(bulkConfig.output.root, bulkConfig.output.saveMode) + ) // Write Metadata - val metadata = MainMetadata(this.getClass.getName, startTimestamp, new java.util.Date(), operationsMetadata.toList) + val metadata = MainMetadata( + this.getClass.getName, startTimestamp, new java.util.Date(), + sourceExtractor.map(se => se.extract(sources)).flatten ++ + new PatientExtractor(bulkConfig.output.root, bulkConfig.output.saveMode, bulkConfig.base).extract(sources) + ) val metadataJson: String = metadata.toJsonString() new PrintWriter("metadata_bulk_" + format.format(startTimestamp) + ".json") { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractor.scala new file mode 100644 index 00000000..768cb08d --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractor.scala @@ -0,0 +1,41 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.etl.events.{Drug, MedicalAct, NgapAct, PractitionerClaimSpeciality} +import fr.polytechnique.cmap.cnam.etl.extractors.acts.{DcirBiologyActExtractor, DcirMedicalActExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.drugs.{DrugConfig, DrugExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.ngapacts.{DcirNgapActExtractor, NgapActConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.prestations.{MedicalPractitionerClaimExtractor, NonMedicalPractitionerClaimExtractor} + +class DcirSourceExtractor( + override val path: String, + override val saveMode: String, + val drugConfig: DrugConfig) extends SourceExtractor(path, saveMode) { + override val sourceName: String = "DCIR" + + override val extractors = List( + ExtractorSources[MedicalAct](DcirMedicalActExtractor, List("ER_PRS_F", "ER_CAM_F", "ER_ETE_F"), "DCIR_MEDICAL_ACT"), + ExtractorSources[MedicalAct]( + DcirBiologyActExtractor, + List("ER_PRS_F", "ER_BIO_F", "ER_ETE_F"), + "DCIR_BIOLOGICAL_ACT" + ), + ExtractorSources[Drug](new DrugExtractor(drugConfig), List("ER_PRS_F", "IR_PHA_R"), "DRUG_PURCHASES"), + ExtractorSources[NgapAct]( + new DcirNgapActExtractor(NgapActConfig(List.empty)), + List("ER_PRS_F", "IR_NAT_V", "ER_ETE_F"), + "DCIR_NGAP_ACTS" + ), + ExtractorSources[PractitionerClaimSpeciality]( + MedicalPractitionerClaimExtractor, + List("ER_PRS_F"), + "DCIR_MEDICAL_PRACTIONNER" + ), + ExtractorSources[PractitionerClaimSpeciality]( + NonMedicalPractitionerClaimExtractor, + List("ER_PRS_F"), + "DCIR_NON_MEDICAL_PRACTIONNER" + ) + ) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala new file mode 100644 index 00000000..bd3955b4 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala @@ -0,0 +1,30 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.extractors.acts.HadCcamActExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ +import fr.polytechnique.cmap.cnam.etl.extractors.takeOverReasons.{HadAssociatedTakeOverExtractor, HadMainTakeOverExtractor} + +class HadSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( + path, + saveMode +) { + override val sourceName: String = "HAD" + override val extractors = List( + ExtractorSources[MedicalAct](HadCcamActExtractor, List("HAD_C", "HAD_A"), "HAD_CCAM_ACT"), + ExtractorSources[Diagnosis](HadMainDiagnosisExtractor, List("HAD_C", "HAD_B"), "HAD_MAIN_DIAGNOSIS"), + ExtractorSources[Diagnosis](HadAssociatedDiagnosisExtractor, List("HAD_C", "HAD_D"), "HAD_ASSOCIATED_DIAGNOSIS"), + ExtractorSources[MedicalTakeOverReason]( + HadMainTakeOverExtractor, + List("HAD_C", "HAD_B"), + "HAD_MAIN_TAKE_OVER_REASON" + ), + ExtractorSources[MedicalTakeOverReason]( + HadAssociatedTakeOverExtractor, + List("HAD_C", "HAD_B"), + "HAD_ASSOCIATED_TAKE_OVER_REASON" + ) + ) +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractor.scala new file mode 100644 index 00000000..c333121e --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractor.scala @@ -0,0 +1,16 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.etl.events.Diagnosis +import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.ImbDiagnosisExtractor + +class ImbSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( + path, + saveMode +) { + override val sourceName: String = "IMB_R" + override val extractors = List( + ExtractorSources[Diagnosis](ImbDiagnosisExtractor, List("IR_IMB_R"), "ALD") + ) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractor.scala new file mode 100644 index 00000000..b58fa475 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractor.scala @@ -0,0 +1,40 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.extractors.acts.McoCeActExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoceEmergenciesExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.ngapacts.{McoCeFbstcNgapActExtractor, McoCeFcstcNgapActExtractor, NgapActConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.prestations.{McoCeFbstcSpecialtyExtractor, McoCeFcstcSpecialtyExtractor} + +class McoCeSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( + path, + saveMode +) { + override val sourceName: String = "MCO_CE" + override val extractors = List( + ExtractorSources[MedicalAct](McoCeActExtractor, List("MCO_CSTC", "MCO_FMSTC"), "MCO_CE_CCAM_ACTS"), + ExtractorSources[NgapAct]( + new McoCeFbstcNgapActExtractor(NgapActConfig(List.empty)), + List("MCO_CSTC", "MCO_FBSTC"), + "MCO_CE_FBSTC_NGAP_ACTS" + ), + ExtractorSources[NgapAct]( + new McoCeFcstcNgapActExtractor(NgapActConfig(List.empty)), + List("MCO_CSTC", "MCO_FCSTC"), + "MCO_CE_FCSTC_NGAP_ACTS" + ), + ExtractorSources[PractitionerClaimSpeciality]( + McoCeFbstcSpecialtyExtractor, + List("MCO_CSTC", "MCO_FBSTC"), + "MCO_CE_FBSTC_PRACTITIONER_SPECIALITY" + ), + ExtractorSources[PractitionerClaimSpeciality]( + McoCeFcstcSpecialtyExtractor, + List("MCO_CSTC", "MCO_FCSTC"), + "MCO_CE_FCSTC_PRACTITIONER_SPECIALITY" + ), + ExtractorSources[HospitalStay](McoceEmergenciesExtractor, List("MCO_CSTC", "MCO_FBSTC"), "MCO_CE_EMERGENCY_VISIT") + ) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractor.scala new file mode 100644 index 00000000..59f2a8f9 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractor.scala @@ -0,0 +1,27 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.extractors.acts.McoCcamActExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.{McoAssociatedDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoHospitalStaysExtractor + +class McoSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( + path, + saveMode +) { + override val sourceName: String = "MCO" + override val extractors = List( + ExtractorSources[MedicalAct](McoCcamActExtractor, List("MCO_C", "MCO_A"), "MCO_CCAM_ACT"), + ExtractorSources[Diagnosis](McoMainDiagnosisExtractor, List("MCO_C", "MCO_B"), "MCO_MAIN_DIAGNOSIS"), + ExtractorSources[Diagnosis]( + McoAssociatedDiagnosisExtractor, + List("MCO_C", "MCO_B", "MCO_D"), + "MCO_ASSOCIATED_DIAGNOSIS" + ), + ExtractorSources[Diagnosis](McoLinkedDiagnosisExtractor, List("MCO_C", "MCO_B"), "MCO_LINKED_DIAGNOSIS"), + ExtractorSources[HospitalStay](McoHospitalStaysExtractor, List("MCO_C", "MCO_B"), "MCO_HOSPITAL_STAY") + ) + +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PatientExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PatientExtractor.scala new file mode 100644 index 00000000..f753426d --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PatientExtractor.scala @@ -0,0 +1,27 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.etl.config.BaseConfig +import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.util.Path +import fr.polytechnique.cmap.cnam.util.reporting.{OperationMetadata, OperationReporter, OperationTypes} + +class PatientExtractor(val path: String, val saveMode: String, val baseConfig: BaseConfig) { + def extract(sources: Sources): List[OperationMetadata] = { + val patients = new Patients(PatientsConfig(baseConfig.studyStart)).extract(sources) + List( + OperationReporter + .report( + "all_patients", + List("DCIR", "MCO", "IR_BEN_R", "MCO_CE"), + OperationTypes.Patients, + patients.toDF, + Path(path), + saveMode + ) + ) + + } +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PmsiHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PmsiHospitalStaysExtractor.scala deleted file mode 100644 index 496645db..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PmsiHospitalStaysExtractor.scala +++ /dev/null @@ -1,14 +0,0 @@ -package fr.polytechnique.cmap.cnam.study.bulk.extractors - -import org.apache.spark.sql.Dataset -import fr.polytechnique.cmap.cnam.etl.events.{Event, HospitalStay} -import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.{McoHospitalStaysExtractor, McoceEmergenciesExtractor} -import fr.polytechnique.cmap.cnam.etl.sources.Sources - -object PmsiHospitalStaysExtractor { - def extract(sources: Sources): Dataset[Event[HospitalStay]] = { - val mco = McoHospitalStaysExtractor.extract(sources, Set.empty[String]) - val mcoce = McoceEmergenciesExtractor.extract(sources, Set.empty[String]) - mco.union(mcoce) - } -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractor.scala new file mode 100644 index 00000000..4181f22a --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractor.scala @@ -0,0 +1,80 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import scala.reflect.runtime.universe._ +import scala.util.{Failure, Success, Try} +import org.apache.log4j.Logger +import org.apache.spark.sql.Dataset +import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.extractors.Extractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.util.Path +import fr.polytechnique.cmap.cnam.util.reporting.{OperationMetadata, OperationReporter, OperationTypes} + +/** + * Extract all available Events from the given source. + * + * This regroups all the available Extractors for a given source and execute them on the Source. If the running passes, + * then the result is stored in the given path and saved in the OperationMetadata. If the running fails, then the + * logger warns the user that the running failed, indicating the missing tables that must flattened. + * + * Every implementation of this abstract class must updated whenever a new Extractor that works on the given Source is + * added. + */ +abstract class SourceExtractor(val path: String, val saveMode: String) { + val sourceName: String + + // This the ugliest bit of this implementation, and there is no way getting around it because of Spark. + // First, Spark Dataset is invariant hence no way of making the Extractor trait covariant to avoid this ugly upper + // bounding. + // Second, TypeTag is needed for the Spark encoder for case class, hence the explicit typing instead of AnyEvent. + // @TODO: Every time you add a new Event type you will need to add it in the "with" clause + val extractors: List[ExtractorSources[_ >: MedicalAct with HospitalStay with Diagnosis with Drug + with MedicalTakeOverReason with NgapAct with PractitionerClaimSpeciality <: AnyEvent with EventBuilder]] + private val logger = Logger.getLogger(this.getClass) + + /** + * Extract all Events from the Source and returns a List of OperationMetadata. + * + * @param sources Sources object containing the sources. + * @return OperationMetadata containing all Events extracted. + */ + def extract(sources: Sources): List[OperationMetadata] = extractors.flatMap(es => runAndReport(sources)(es)) + + def runAndReport[A <: AnyEvent : TypeTag](sources: Sources)(es: ExtractorSources[A]): Option[OperationMetadata] = + run(es.extractor, sources) match { + case Success(tde) => Some(report(es, tde)) + case Failure(error) => { + logger.warn( + "Extractor " + es + .extractor + " failed, probably you didn't flatten all the following tables" + es.sources + ) + None + } + } + + def run[A <: AnyEvent : TypeTag](extractor: Extractor[A], sources: Sources): Try[Dataset[Event[A]]] = { + Try { + extractor.extract(sources, Set.empty)(typeTag[A]) + } + } + + def report[A <: AnyEvent : TypeTag]( + extractorSources: ExtractorSources[A], + result: Dataset[Event[A]]): OperationMetadata = OperationReporter + .report( + extractorSources.name, + extractorSources.sources, + OperationTypes.AnyEvents, + result.toDF, + Path(path), + saveMode + ) +} + + +case class ExtractorSources[EventType <: AnyEvent : TypeTag]( + extractor: Extractor[EventType], + sources: List[String], + name: String) \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractor.scala new file mode 100644 index 00000000..8da68052 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractor.scala @@ -0,0 +1,16 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.etl.events.MedicalAct +import fr.polytechnique.cmap.cnam.etl.extractors.acts.SsrCeActExtractor + +class SsrCeSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( + path, + saveMode +) { + override val sourceName: String = "SSR_CE" + override val extractors = List( + ExtractorSources[MedicalAct](SsrCeActExtractor, List("SSR_CSTC", "SSR_FMSTC"), "SSR_CE_CCAM") + ) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractor.scala new file mode 100644 index 00000000..dfbe8777 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractor.scala @@ -0,0 +1,24 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, HospitalStay, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.acts.{SsrCcamActExtractor, SsrCsarrActExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ +import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.SsrHospitalStaysExtractor + +class SsrSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( + path, + saveMode +) { + override val sourceName: String = "SSR" + override val extractors = List( + ExtractorSources[MedicalAct](SsrCcamActExtractor, List("SSR_C", "SSR_CCAM"), "SSR_CCAM"), + ExtractorSources[MedicalAct](SsrCsarrActExtractor, List("SSR_C", "SSR_CSARR"), "SSR_CSARR"), + ExtractorSources[Diagnosis](SsrMainDiagnosisExtractor, List("SSR_C", "SSR_B"), "SSR_MAIN_DIAGNOSIS"), + ExtractorSources[Diagnosis](SsrLinkedDiagnosisExtractor, List("SSR_C", "SSR_B"), "SSR_LINKED_DIAGNOSIS"), + ExtractorSources[Diagnosis](SsrAssociatedDiagnosisExtractor, List("SSR_C", "SSR_D"), "SSR_ASSOCIATED_DIAGNOSIS"), + ExtractorSources[Diagnosis](SsrTakingOverPurposeExtractor, List("SSR_C", "SSR_B"), "SSR_TAKE_OVER_REASON"), + ExtractorSources[HospitalStay](SsrHospitalStaysExtractor, List("SSR_C", "SSR_B"), "SSR_STAY") + ) +} \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala index 83a052ef..508fa0b8 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala @@ -85,7 +85,7 @@ class McoHospitalStayExtractorSuite extends SharedContext { val result: Dataset[Event[HospitalStay]] = McoHospitalStaysExtractor.extract(sources, Set.empty[String]) //Then - assertDSs(expected, result, true) + assertDSs(expected, result) } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala index 84b3af13..6e37d613 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala @@ -2,10 +2,7 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts -import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.DataFrame -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.sources.Sources @@ -14,19 +11,19 @@ import fr.polytechnique.cmap.cnam.util.functions.makeTS class DcirNgapActsExtractorSuite extends SharedContext { object ngapClassKeyLetterCoefficient extends NgapActClassConfig { + override val ngapPrsNatRefs: Seq[String] = Seq() val ngapKeyLetters: Seq[String] = Seq("C") val ngapCoefficients: Seq[String] = Seq( "0.42" ) - override val ngapPrsNatRefs: Seq[String] = Seq() } object ngapPrsNatRef extends NgapActClassConfig { + override val ngapPrsNatRefs: Seq[String] = Seq("1111") val ngapKeyLetters: Seq[String] = Seq("D") val ngapCoefficients: Seq[String] = Seq( "0.45" ) - override val ngapPrsNatRefs: Seq[String] = Seq("1111") } "extract" should "extract ngap acts events from raw data with a ngapClass based on key letter B2 and coefficient" in { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala index 52adf8a2..ccfb0f30 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala @@ -1,10 +1,10 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts +import org.apache.spark.sql.DataFrame import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS -import org.apache.spark.sql.DataFrame class McoNgapActsExtractorSuite extends SharedContext { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformerSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformerSuite.scala index cf809b93..5b325605 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformerSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/drugprescription/DrugPrescriptionTransformerSuite.scala @@ -33,7 +33,7 @@ class DrugPrescriptionTransformerSuite extends SharedContext { val result = transformer.transform(input) - assertDSs(expected.as[Event[Drug]], result.as[Event[Drug]], true) + assertDSs(expected.as[Event[Drug]], result.as[Event[Drug]]) } From f0490e1da1fbee8d32595254791e8248b0de3e7f Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Mon, 6 Apr 2020 14:46:49 +0200 Subject: [PATCH 25/38] CNAM-450: Add tests. --- .../bulk/extractors/HadSourceExtractor.scala | 10 ++- .../extractors/DcirSourceExtractorSuite.scala | 26 ++++++ .../extractors/HadSourceExtractorSuite.scala | 22 +++++ .../extractors/ImbSourceExtractorSuite.scala | 22 +++++ .../McoCeSourceExtractorSuite.scala | 22 +++++ .../extractors/McoSourceExtractorSuite.scala | 22 +++++ .../extractors/SourceExtractorSuite.scala | 86 +++++++++++++++++++ .../SsrCeSourceExtractorSuite.scala | 33 +++++++ .../extractors/SsrSourceExtractorSuite.scala | 22 +++++ 9 files changed, 263 insertions(+), 2 deletions(-) create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractorSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractorSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractorSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractorSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractorSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractorSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractorSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractorSuite.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala index bd3955b4..7f21a231 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala @@ -2,9 +2,10 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors -import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, HospitalStay, MedicalAct, MedicalTakeOverReason} import fr.polytechnique.cmap.cnam.etl.extractors.acts.HadCcamActExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ +import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.{HadAssociatedDiagnosisExtractor, HadMainDiagnosisExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.HadHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.takeOverReasons.{HadAssociatedTakeOverExtractor, HadMainTakeOverExtractor} class HadSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( @@ -25,6 +26,11 @@ class HadSourceExtractor(override val path: String, override val saveMode: Strin HadAssociatedTakeOverExtractor, List("HAD_C", "HAD_B"), "HAD_ASSOCIATED_TAKE_OVER_REASON" + ), + ExtractorSources[HospitalStay]( + HadHospitalStaysExtractor, + List("HAD_C", "HAD_B"), + "HAD_STAYS" ) ) } \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractorSuite.scala new file mode 100644 index 00000000..f59a86a8 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractorSuite.scala @@ -0,0 +1,26 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import org.apache.spark.sql.DataFrame +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.extractors.drugs.DrugConfig +import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.Cip13Level +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +class DcirSourceExtractorSuite extends SharedContext { + "extract" should "extract available Events and warns when it fails if the tables have not been flattened" in { + val sqlCtx = sqlContext + + // Given + val dcir: DataFrame = sqlCtx.read.load("src/test/resources/test-input/DCIR.parquet") + val source = new Sources(dcir = Some(dcir)) + val path = "target/test/output" + val drugConfig = new DrugConfig(Cip13Level, List.empty) + val dcirSource = new DcirSourceExtractor(path, "overwrite", drugConfig) + + dcirSource.extract(source) + + assert(true) + } +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractorSuite.scala new file mode 100644 index 00000000..5190b203 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractorSuite.scala @@ -0,0 +1,22 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +class HadSourceExtractorSuite extends SharedContext { + "extract" should "extract available Events and warns when it fails if the tables have not been flattened" in { + val sqlCtx = sqlContext + + // Given + val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") + val source = new Sources(had = Some(had)) + val path = "target/test/output" + val hadSource = new HadSourceExtractor(path, "overwrite") + // When + hadSource.extract(source) + // Then, make sure everything is running. + assert(true) + } +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractorSuite.scala new file mode 100644 index 00000000..53195285 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractorSuite.scala @@ -0,0 +1,22 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +class ImbSourceExtractorSuite extends SharedContext { + "extract" should "extract available Events and warns when it fails if the tables have not been flattened" in { + val sqlCtx = sqlContext + + // Given + val imbR = spark.read.parquet("src/test/resources/test-input/IR_IMB_R.parquet") + val source = new Sources(had = Some(imbR)) + val path = "target/test/output" + val imbSource = new ImbSourceExtractor(path, "overwrite") + // When + imbSource.extract(source) + // Then, make sure everything is running. + assert(true) + } +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractorSuite.scala new file mode 100644 index 00000000..c415991a --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractorSuite.scala @@ -0,0 +1,22 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +class McoCeSourceExtractorSuite extends SharedContext { + "extract" should "extract available Events and warns when it fails if the tables have not been flattened" in { + val sqlCtx = sqlContext + + // Given + val mcoce = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") + val source = new Sources(mcoCe = Some(mcoce)) + val path = "target/test/output" + val mcoCeSource = new McoCeSourceExtractor(path, "overwrite") + // When + mcoCeSource.extract(source) + // Then, make sure everything is running. + assert(true) + } +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractorSuite.scala new file mode 100644 index 00000000..493716e8 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractorSuite.scala @@ -0,0 +1,22 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +class McoSourceExtractorSuite extends SharedContext { + "extract" should "extract available Events and warns when it fails if the tables have not been flattened" in { + val sqlCtx = sqlContext + + // Given + val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") + val source = new Sources(mco = Some(mco)) + val path = "target/test/output" + val mcoSource = new McoSourceExtractor(path, "overwrite") + // When + mcoSource.extract(source) + // Then, make sure everything is running. + assert(true) + } +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractorSuite.scala new file mode 100644 index 00000000..fba06d5d --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractorSuite.scala @@ -0,0 +1,86 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + + +import scala.reflect.runtime.universe +import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext} +import fr.polytechnique.cmap.cnam.etl.extractors.Extractor +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCIM10Act, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.util.functions.makeTS +import fr.polytechnique.cmap.cnam.util.reporting.{OperationMetadata, OperationTypes} +import fr.polytechnique.cmap.cnam.util.Path + +class SourceExtractorSuite extends SharedContext { + lazy val sqlCtx: SQLContext = super.sqlContext + + // This shouldn't be replicated anywhere and Mocking should be the preferred technique. + // Mocking the Extractor is not possible because of type erasure. Type erasure make the typing of the implicit + // in the extractor method of trait Extractor as the type is not known at compile time, but only at run time. + val testExtractor = new Extractor[MedicalAct] with Serializable { + override def isInStudy(codes: Set[String]) + (row: Row): Boolean = true + + override def isInExtractorScope(row: Row): Boolean = true + + override def builder(row: Row): Seq[Event[MedicalAct]] = + Seq(McoCIM10Act("Patient_02", "10000123_10000987_2006", "C670", makeTS(2005, 12, 31))) + + override def getInput(sources: Sources): DataFrame = { + import sqlCtx.implicits._ + Seq[Event[MedicalAct]]( + McoCIM10Act("Patient_02", "10000123_10000987_2006", "C670", makeTS(2005, 12, 31)), + McoCIM10Act("Patient_02", "10000123_20000123_2007", "C670", makeTS(2007, 1, 31)), + McoCIM10Act("Patient_02", "10000123_30000546_2008", "C670", makeTS(2008, 3, 10)) + ).toDF + } + + override def extract( + sources: Sources, + codes: Set[String]) + (implicit ctag: universe.TypeTag[MedicalAct]): Dataset[Event[MedicalAct]] = { + import sqlCtx.implicits._ + Seq[Event[MedicalAct]]( + McoCIM10Act("Patient_02", "10000123_10000987_2006", "C670", makeTS(2005, 12, 31)), + McoCIM10Act("Patient_02", "10000123_20000123_2007", "C670", makeTS(2007, 1, 31)), + McoCIM10Act("Patient_02", "10000123_30000546_2008", "C670", makeTS(2008, 3, 10)) + ).toDS + } + } + + "extract" should "produce run and report the Extractors" in { + import sqlCtx.implicits._ + + // Given + + val sources = Sources() + val ds = Seq[Event[MedicalAct]]( + McoCIM10Act("Patient_02", "10000123_10000987_2006", "C670", makeTS(2005, 12, 31)), + McoCIM10Act("Patient_02", "10000123_20000123_2007", "C670", makeTS(2007, 1, 31)), + McoCIM10Act("Patient_02", "10000123_30000546_2008", "C670", makeTS(2008, 3, 10)) + ).toDS + val path = "target/test/output" + + val expected = List( + OperationMetadata( + "Mock", + List("Mock"), + OperationTypes.AnyEvents, + Path(path, "Mock", "data").toString, + Path(path, "Mock", "patients").toString + ) + ) + // When + val se: SourceExtractor = new SourceExtractor(path, "overwrite") { + override val sourceName: String = "Test" + override val extractors: List[ExtractorSources[MedicalAct]] = + List(ExtractorSources[MedicalAct](testExtractor, List("Mock"), "Mock")) + } + + val result = se.extract(sources) + // Then + assert(result == expected) + } +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractorSuite.scala new file mode 100644 index 00000000..0cf52a56 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractorSuite.scala @@ -0,0 +1,33 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.extractors.acts.SsrCeActExtractor +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class SsrCeSourceExtractorSuite extends SharedContext { + "extract" should "extract available Events and warns when it fails if the tables have not been flattened" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + import SsrCeActExtractor.ColNames + // Given + val ssrCe = Seq( + ("Patient_A", "AAAA", makeTS(2010, 1, 1)), + ("Patient_A", "BBBB", makeTS(2010, 2, 1)), + ("Patient_B", "CCCC", makeTS(2010, 3, 1)), + ("Patient_B", "CCCC", makeTS(2010, 4, 1)), + ("Patient_C", "BBBB", makeTS(2010, 5, 1)) + ).toDF( + ColNames.PatientID, ColNames.CamCode, ColNames.Date + ) + val source = new Sources(ssrCe = Some(ssrCe)) + val path = "target/test/output" + val ssrCeSource = new SsrCeSourceExtractor(path, "overwrite") + // When + ssrCeSource.extract(source) + // Then, make sure everything is running. + assert(true) + } +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractorSuite.scala new file mode 100644 index 00000000..cb812e33 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractorSuite.scala @@ -0,0 +1,22 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.bulk.extractors + +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +class SsrSourceExtractorSuite extends SharedContext { + "extract" should "extract available Events and warns when it fails if the tables have not been flattened" in { + val sqlCtx = sqlContext + + // Given + val ssr = spark.read.parquet("src/test/resources/test-input/SSR.parquet") + val source = new Sources(ssr = Some(ssr)) + val path = "target/test/output" + val ssrSource = new SsrSourceExtractor(path, "overwrite") + // When + ssrSource.extract(source) + // Then, make sure everything is running. + assert(true) + } +} From be394ec97c8192fefa29c8982429e57ffb409337 Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Tue, 31 Mar 2020 15:09:52 +0200 Subject: [PATCH 26/38] CNAM-452: First working version. --- .../cmap/cnam/etl/events/Diagnosis.scala | 4 +- .../cmap/cnam/etl/events/MedicalAct.scala | 2 +- .../cnam/etl/extractors/BasicExtractor.scala | 49 + .../etl/extractors/EventRowExtractor.scala | 1 + .../cmap/cnam/etl/extractors/Extractor.scala | 70 +- .../cnam/etl/extractors/ExtractorCodes.scala | 26 + .../acts/DcirMedicalActExtractor.scala | 114 +- .../etl/extractors/acts/HadActExtractor.scala | 12 - .../extractors/acts/HadCcamActExtractor.scala | 14 + ...ractor.scala => McoCcamActExtractor.scala} | 26 +- .../extractors/acts/McoCeActExtractor.scala | 19 - .../acts/McoCeCcamActExtractor.scala | 16 + .../extractors/acts/MedicalActsConfig.scala | 5 +- .../etl/extractors/acts/SsrActExtractor.scala | 34 +- .../extractors/acts/SsrCeActExtractor.scala | 47 +- .../classifications/GhmExtractor.scala | 12 +- .../extractors/dcir/DcirBasicExtractor.scala | 13 + ...Extractor.scala => DcirRowExtractor.scala} | 48 +- .../cnam/etl/extractors/dcir/DcirSource.scala | 22 +- .../diagnoses/HadDiagnosisExtractor.scala | 19 +- .../diagnoses/ImbCimDiagnosisExtractor.scala | 32 + .../diagnoses/ImbDiagnosisExtractor.scala | 87 - .../diagnoses/McoDiagnosisExtractor.scala | 29 +- .../diagnoses/SsrDiagnosisExtractor.scala | 26 +- .../etl/extractors/drugs/DrugConfig.scala | 6 +- .../etl/extractors/drugs/DrugExtractor.scala | 77 +- .../classification/families/Cardiac.scala | 18 +- .../classification/families/Opioids.scala | 936 ++++---- .../families/ProtonPumpInhibitors.scala | 1906 ++++++++--------- .../extractors/had/HadBasicExtractor.scala | 15 + .../etl/extractors/had/HadExtractor.scala | 47 - .../etl/extractors/had/HadRowExtractor.scala | 25 + .../cnam/etl/extractors/had/HadSource.scala | 19 +- .../HadHospitalStaysExtractor.scala | 29 +- .../McoHospitalStaysExtractor.scala | 29 +- .../McoceEmergenciesExtractor.scala | 78 +- .../SsrHospitalStaysExtractor.scala | 18 +- .../extractors/imb/ImbBasicExtractor.scala | 15 + .../etl/extractors/imb/ImbRowExtractor.scala | 60 + .../cnam/etl/extractors/imb/ImbSource.scala | 15 + .../extractors/mco/McoBasicExtractor.scala | 14 + .../etl/extractors/mco/McoExtractor.scala | 77 - .../etl/extractors/mco/McoRowExtractor.scala | 42 + .../cnam/etl/extractors/mco/McoSource.scala | 7 +- .../mcoCe/McoCeBasicExtractor.scala | 11 + .../etl/extractors/mcoCe/McoCeExtractor.scala | 51 - .../extractors/mcoCe/McoCeRowExtractor.scala | 34 + .../etl/extractors/mcoCe/McoCeSource.scala | 25 +- .../molecules/DcirMoleculePurchases.scala | 37 +- .../molecules/MoleculePurchases.scala | 2 +- .../molecules/MoleculePurchasesConfig.scala | 6 +- .../ngapacts/DcirNgapActExtractor.scala | 142 +- .../ngapacts/McoCeNgapActExtractor.scala | 119 +- .../ngapacts/NgapActClassConfig.scala | 26 +- .../extractors/ngapacts/NgapActConfig.scala | 14 +- .../extractors/patients/DcirPatients.scala | 2 +- .../etl/extractors/patients/HadPatients.scala | 4 +- .../extractors/patients/IrBenPatients.scala | 2 +- .../etl/extractors/patients/McoPatients.scala | 2 +- .../extractors/patients/McocePatients.scala | 2 +- .../etl/extractors/patients/Patients.scala | 2 +- .../prestations/McoCeSpecialtyExtractor.scala | 36 + ...PractitionerClaimSpecialityExtractor.scala | 97 +- .../extractors/ssr/SsrBasicExtractor.scala | 13 + .../etl/extractors/ssr/SsrExtractor.scala | 47 - .../etl/extractors/ssr/SsrRowExtractor.scala | 26 + .../cnam/etl/extractors/ssr/SsrSource.scala | 35 +- .../ssrce/SsrCeBasicExtractor.scala | 14 + .../extractors/ssrce/SsrCeRowExtractor.scala | 15 + .../etl/extractors/ssrce/SsrCeSource.scala | 18 + .../HadTakeOverReasonExtractor.scala | 29 +- .../cmap/cnam/study/bulk/BulkMain.scala | 9 +- .../cmap/cnam/study/fall/FallMain.scala | 2 +- .../cnam/study/fall/FallMainExtract.scala | 2 +- .../study/fall/extractors/ActsExtractor.scala | 7 +- .../fall/extractors/CardiacExtractor.scala | 2 +- .../study/fall/extractors/ControlDrugs.scala | 2 +- .../fall/extractors/DiagnosisExtractor.scala | 7 +- .../fall/extractors/DrugsExtractor.scala | 5 +- .../fall/extractors/EpilepticsExtractor.scala | 9 +- .../study/fall/extractors/HTAExtractor.scala | 2 +- .../study/fall/extractors/IPPExtractor.scala | 4 +- .../extractors/McoDiagnosisExtractor.scala | 34 +- .../fall/extractors/OpioidsExtractor.scala | 6 +- .../fractures/PublicAmbulatoryFractures.scala | 2 +- .../cnam/study/fall/fractures/Surgery.scala | 2 +- .../study/pioglitazone/PioglitazoneMain.scala | 2 +- .../pioglitazone/extractors/Diagnoses.scala | 7 +- .../pioglitazone/extractors/MedicalActs.scala | 9 +- .../rosiglitazone/RosiglitazoneMain.scala | 2 +- .../rosiglitazone/extractors/Diagnoses.scala | 7 +- .../PrescriptionExtractorSuite.scala | 9 +- .../acts/DcirBiologyActsSuite.scala | 223 +- .../acts/DcirMedicalActsSuite.scala | 279 +-- .../extractors/acts/HadMedicalActsSuite.scala | 13 +- .../acts/McoCEMedicalActsSuite.scala | 33 +- .../extractors/acts/McoMedicalActsSuite.scala | 51 +- .../acts/SsrCEMedicalActsSuite.scala | 34 +- .../extractors/acts/SsrMedicalActsSuite.scala | 13 +- .../GHMClassificationsSuite.scala | 7 +- .../diagnoses/HadDiagnosesSuite.scala | 13 +- .../diagnoses/ImbDiagnosesSuite.scala | 25 +- .../diagnoses/McoDiagnosesSuite.scala | 16 +- .../diagnoses/SsrDiagnosesSuite.scala | 25 +- .../drugs/DrugsExtractorSuite.scala | 18 +- .../etl/extractors/had/HadSourceSuite.scala | 4 +- .../HadHospitalStayExtractorSuite.scala | 6 +- .../McoHospitalStayExtractorSuite.scala | 18 +- .../McoceEmergenciesExtractorSuite.scala | 36 +- .../SSrHospitalStayExtractorSuite.scala | 6 +- .../etl/extractors/mco/McoSourceSuite.scala | 4 +- .../DcirMoleculePurchasesSuite.scala | 4 +- .../molecules/MoleculePurchasesSuite.scala | 6 +- .../ngapacts/DcirNgapActsExtractorSuite.scala | 47 +- .../ngapacts/McoNgapActsExtractorSuite.scala | 30 +- .../patients/HadPatientsSuite.scala | 5 +- .../patients/McocePatientsSuite.scala | 2 +- .../extractors/patients/PatientsSuite.scala | 4 +- ...itionerClaimSpecialityExtractorSuite.scala | 25 +- .../etl/extractors/ssr/SsrSourceSuite.scala | 33 +- .../HadTakeOveReasonSuite.scala | 13 +- .../fractures/FracturesTransformerSuite.scala | 6 +- .../PublicAmbulatoryFracturesSuite.scala | 10 +- .../outcomes/NaiveBladderCancerSuite.scala | 2 +- 124 files changed, 2916 insertions(+), 3197 deletions(-) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/BasicExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ExtractorCodes.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadActExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadCcamActExtractor.scala rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/{McoActExtractor.scala => McoCcamActExtractor.scala} (50%) delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeCcamActExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirBasicExtractor.scala rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/{DcirExtractor.scala => DcirRowExtractor.scala} (50%) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbCimDiagnosisExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosisExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadBasicExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadRowExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbBasicExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbRowExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbSource.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoBasicExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoRowExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeBasicExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeRowExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/McoCeSpecialtyExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrBasicExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrRowExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeBasicExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeRowExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeSource.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala index f21f34c0..5fefa552 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala @@ -63,6 +63,6 @@ object SsrTakingOverPurpose extends Diagnosis { val category: EventCategory[Diagnosis] = "ssr_taking_over_purpose" } -object ImbDiagnosis extends Diagnosis { - override val category: EventCategory[Diagnosis] = "imb_diagnosis" +object ImbCcamDiagnosis extends Diagnosis { + override val category: EventCategory[Diagnosis] = "imb_ccam_diagnosis" } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala index 03d3f736..76a04463 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala @@ -40,7 +40,7 @@ object McoCIM10Act extends MedicalAct { val category: EventCategory[MedicalAct] = "mco_cim10_act" } -object McoCEAct extends MedicalAct { +object McoCeCcamAct extends MedicalAct { val category: EventCategory[MedicalAct] = "mco_ce_act" } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/BasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/BasicExtractor.scala new file mode 100644 index 00000000..393b15b1 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/BasicExtractor.scala @@ -0,0 +1,49 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors + +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} + +trait BasicExtractor[EventType <: AnyEvent] extends Extractor[EventType, BaseExtractorCodes] { + self: EventRowExtractor => + + def columnName: String + def eventBuilder: EventBuilder + def neededColumns: List[String] = columnName :: self.usedColumns + + def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(columnName)) + + def builder(row: Row): Seq[Event[EventType]] = { + lazy val patientId = extractPatientId(row) + lazy val groupId = extractGroupId(row) + lazy val value = extractValue(row) + lazy val eventDate = extractStart(row) + lazy val endDate = extractEnd(row) + lazy val weight = extractWeight(row) + + Seq(eventBuilder[EventType](patientId, groupId, value, weight, eventDate, endDate)) + } + + def extractValue(row: Row): String = row.getAs[String](columnName) +} + +sealed trait InStudyStrategy[EventType <: AnyEvent] { + self: BasicExtractor[EventType]=> + override def isInStudy(row: Row): Boolean +} + +trait AlwaysTrueStrategy[EventType <: AnyEvent] extends InStudyStrategy[EventType] { + self: BasicExtractor[EventType] => + def isInStudy(row: Row): Boolean = true +} + +trait IsInStrategy[EventType <: AnyEvent] extends InStudyStrategy[EventType] { + self: BasicExtractor[EventType] => + def isInStudy(row: Row): Boolean = getCodes.contains(extractValue(row)) +} + +trait StartsWithStrategy[EventType <: AnyEvent] extends InStudyStrategy[EventType] { + self: BasicExtractor[EventType] => + def isInStudy(row: Row): Boolean = getCodes.exists(extractValue(row).startsWith) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala index e93814dd..0c8c7ebc 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala @@ -7,6 +7,7 @@ import org.apache.spark.sql.Row trait EventRowExtractor { self: ColumnNames => + def usedColumns: List[String] = List.empty def extractPatientId(r: Row): String diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala index 2be85eac..427e7a61 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala @@ -2,66 +2,66 @@ package fr.polytechnique.cmap.cnam.etl.extractors -import scala.reflect.runtime.universe._ +import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.{DataFrame, Dataset, Row} import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event} import fr.polytechnique.cmap.cnam.etl.sources.Sources -trait Extractor[EventType <: AnyEvent] extends Serializable { +trait Extractor[EventType <: AnyEvent, +Codes <: ExtractorCodes] extends Serializable { - /** Allows to check if the Row from the Source is considered in the current Study. - * - * @param codes A set of codes being considered in the Study. - * @param row The row itself. - * @return A boolean value. - */ - def isInStudy(codes: Set[String])(row: Row): Boolean + def getCodes: Codes + + /** Allows to check if the Row from the Source is considered in the current Study. + * + * @param row The row itself. + * @return A boolean value. + */ + def isInStudy(row: Row): Boolean /** Checks if the passed Row has the information needed to build the Event. - * - * @param row The row itself. - * @return A boolean value. - */ + * + * @param row The row itself. + * @return A boolean value. + */ def isInExtractorScope(row: Row): Boolean /** Builds the Event. - * - * @param row The row itself. - * @return An event object. - */ + * + * @param row The row itself. + * @return An event object. + */ def builder(row: Row): Seq[Event[EventType]] /** Gets and prepares all the needed columns from the Source. - * - * @param sources Source object [[Sources]] that contains all sources. - * @return A dataframe with mco columns. - */ + * + * @param sources Source object [[Sources]] that contains all sources. + * @return A dataframe with mco columns. + */ def getInput(sources: Sources): DataFrame /** Extracts the Event from the Source. - * - * This function is responsible for gluing different other parts of the Extractor. - * This method should be considered the unique callable method from a Study perspective. - * - * @param sources Source object [[Sources]] that contains all sources. - * @param codes A set of codes passed through the method. - * @param ctag An implicit parameter taken from Eventype type. - * @return A dataset of Events. - */ - def extract(sources: Sources, codes: Set[String])(implicit ctag: TypeTag[EventType]): Dataset[Event[EventType]] = { + * + * This function is responsible for gluing different other parts of the Extractor. + * This method should be considered the unique callable method from a Study perspective. + * + * @param sources Source object [[Sources]] that contains all sources. + * @param ctag An implicit parameter taken from Eventype type. + * @return A dataset of Events. + */ + def extract(sources: Sources)(implicit ctag: TypeTag[EventType]): Dataset[Event[EventType]] = { val input: DataFrame = getInput(sources) import input.sqlContext.implicits._ { - if (codes.isEmpty) { + if (getCodes.isEmpty) { input.filter(isInExtractorScope _) } else { - input.filter(isInExtractorScope _).filter(isInStudy(codes) _) + input.filter(isInExtractorScope _).filter(isInStudy _) } - }.flatMap(builder _).distinct() + }.flatMap(builder).distinct() } -} \ No newline at end of file +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ExtractorCodes.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ExtractorCodes.scala new file mode 100644 index 00000000..42c59ee3 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ExtractorCodes.scala @@ -0,0 +1,26 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors + +import scala.collection.immutable.HashSet + +trait ExtractorCodes extends Serializable { + def isEmpty: Boolean +} + +class BaseExtractorCodes(val codes: List[String]) extends ExtractorCodes { + val internalCodes: HashSet[String] = codes.to[HashSet] + + override def isEmpty: Boolean = internalCodes.isEmpty + + def exists(p: String => Boolean): Boolean = internalCodes.exists(p) + + def contains(code: String): Boolean = internalCodes.contains(code) +} + +object BaseExtractorCodes { + def empty = new BaseExtractorCodes(List.empty) + + def apply(codes: List[String]): BaseExtractorCodes = new BaseExtractorCodes(codes) + +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala index 1edd9786..31d6b550 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala @@ -4,85 +4,79 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts import java.sql.Timestamp import scala.util.Try -import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{BiologyDcirAct, DcirAct, EventBuilder, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirExtractor -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.functions +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirBasicExtractor +import fr.polytechnique.cmap.cnam.util.functions.makeTS -trait DcirActExtractor extends DcirExtractor[MedicalAct] { +abstract sealed class DcirRowActExtractor(codes: BaseExtractorCodes) extends DcirBasicExtractor[MedicalAct] + with StartsWithStrategy[MedicalAct] { - private final val PrivateInstitutionCodes = Set(4D, 5D, 6D, 7D) + final val PrivateInstitutionCodes = Set(4D, 5D, 6D, 7D) - override def extractGroupId(r: Row): String = { - getGroupId(r) recover { case _: IllegalArgumentException => DcirAct.groupID.DcirAct } + override def usedColumns: List[String] = List(ColNames.InstitutionCode, ColNames.GHSCode, ColNames.Sector) ++ super + .usedColumns + + override def getCodes: BaseExtractorCodes = codes + + override def extractStart(r: Row): Timestamp = { + Try(super.extractStart(r)) recover { + case _ => makeTS(1970, 1, 1) + } }.get /** - * Get the information of the origin of DCIR act that is being extracted. It returns a - * Failure[IllegalArgumentException] if the DCIR schema is old, a success if the DCIR schema contains an information. - * - * @param r the row of DCIR to be investigated. - * @return Try[String] - */ - def getGroupId(r: Row): Try[String] = Try { - - if (!r.isNullAt(r.fieldIndex(ColNames.Sector)) && getSector(r) == 1) { - DcirAct.groupID.PublicAmbulatory - } - else { - if (r.isNullAt(r.fieldIndex(ColNames.GHSCode))) { - DcirAct.groupID.Liberal - } else { - // Value is not at null, it is not liberal - lazy val ghs = getGHS(r) - lazy val institutionCode = getInstitutionCode(r) - // Check if it is a private ambulatory - if (ghs == 0 && PrivateInstitutionCodes.contains(institutionCode)) { - DcirAct.groupID.PrivateAmbulatory - } - else { - DcirAct.groupID.Unknown + * Get the information of the origin of DCIR act that is being extracted. It returns a + * Failure[IllegalArgumentException] if the DCIR schema is old, a success if the DCIR schema contains an information. + * + * @param r the row of DCIR to be investigated. + * @return Try[String] + */ + // TODO: REMOVE THIS + override def extractGroupId(r: Row): String = { + Try { + + if (!r.isNullAt(r.fieldIndex(ColNames.Sector)) && getSector(r) == 1) { + DcirAct.groupID.PublicAmbulatory + } + else { + if (r.isNullAt(r.fieldIndex(ColNames.GHSCode))) { + DcirAct.groupID.Liberal + } else { + // Value is not at null, it is not liberal + lazy val ghs = getGHS(r) + lazy val institutionCode = getInstitutionCode(r) + // Check if it is a private ambulatory + if (ghs == 0 && PrivateInstitutionCodes.contains(institutionCode)) { + DcirAct.groupID.PrivateAmbulatory + } + else { + DcirAct.groupID.Unknown + } } } - } - } - - def getGHS(r: Row): Double = r.getAs[Double](ColNames.GHSCode) - - def getInstitutionCode(r: Row): Double = r.getAs[Double](ColNames.InstitutionCode) + } recover { case _: IllegalArgumentException => DcirAct.groupID.DcirAct } + }.get - def getSector(r: Row): Double = r.getAs[Double](ColNames.Sector) + private def getGHS(r: Row): Double = r.getAs[Double](ColNames.GHSCode) - override def extractWeight(r: Row): Double = 1.0 + private def getInstitutionCode(r: Row): Double = r.getAs[Double](ColNames.InstitutionCode) - override def extractStart(r: Row): Timestamp = { - Try(super.extractStart(r)) recover { - case _ => functions.makeTS(1970, 1, 1) - } - }.get + private def getSector(r: Row): Double = r.getAs[Double](ColNames.Sector) } - -object DcirMedicalActExtractor extends DcirActExtractor { +final case class DcirMedicalActExtractor(codes: BaseExtractorCodes) + extends DcirRowActExtractor(codes) { override val columnName: String = ColNames.CamCode override val eventBuilder: EventBuilder = DcirAct - - override def getInput(sources: Sources): DataFrame = sources.dcir.get.select( - ColNames.PatientID, ColNames.CamCode, ColNames.Date, - ColNames.InstitutionCode, ColNames.GHSCode, ColNames.Sector - ) } - -object DcirBiologyActExtractor extends DcirActExtractor { +final case class DcirBiologyActExtractor(codes: BaseExtractorCodes) + extends DcirRowActExtractor(codes) { override val columnName: String = ColNames.BioCode override val eventBuilder: EventBuilder = BiologyDcirAct - override def code = (row: Row) => row.getAs[Double](columnName).toString + override def extractValue(row: Row): String = row.getAs[Double](columnName).toString - override def getInput(sources: Sources): DataFrame = sources.dcir.get.select( - ColNames.PatientID, ColNames.BioCode, ColNames.Date, - ColNames.InstitutionCode, ColNames.GHSCode, ColNames.Sector - ) -} \ No newline at end of file +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadActExtractor.scala deleted file mode 100644 index ed83b17b..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadActExtractor.scala +++ /dev/null @@ -1,12 +0,0 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.acts - -import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.had.HadExtractor -import org.apache.spark.sql.Row - -object HadCcamActExtractor extends HadExtractor[MedicalAct] { - final override val columnName: String = ColNames.CCAM - override val eventBuilder: EventBuilder = HadCCAMAct -} - - diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadCcamActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadCcamActExtractor.scala new file mode 100644 index 00000000..3aeb2953 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadCcamActExtractor.scala @@ -0,0 +1,14 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.acts + +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HadCCAMAct, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.had.HadBasicExtractor + +final case class HadCcamActExtractor(codes: BaseExtractorCodes) extends HadBasicExtractor[MedicalAct] + with StartsWithStrategy[MedicalAct] { + override val columnName: String = ColNames.CCAM + override val eventBuilder: EventBuilder = HadCCAMAct + override def getCodes: BaseExtractorCodes = codes +} + + diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCcamActExtractor.scala similarity index 50% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoActExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCcamActExtractor.scala index b0ada5bc..46ab225d 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCcamActExtractor.scala @@ -6,10 +6,20 @@ import java.sql.Timestamp import me.danielpes.spark.datetime.Period import me.danielpes.spark.datetime.implicits.DateImplicits import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoExtractor +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, McoCCAMAct, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoBasicExtractor + + +final case class McoCcamActExtractor(codes: BaseExtractorCodes) extends McoBasicExtractor[MedicalAct] + with StartsWithStrategy[MedicalAct] { + override val columnName: String = ColNames.CCAM + override val eventBuilder: EventBuilder = McoCCAMAct + + override def usedColumns: List[String] = ColNames.CCAMDelayDate :: super.usedColumns + + override def getCodes: BaseExtractorCodes = codes -trait McoActExtractor extends McoExtractor[MedicalAct] { override def extractStart(r: Row): Timestamp = { (r.getAs[Timestamp](NewColumns.EstimatedStayStart) + Period(days = getDateOffset(r))).get } @@ -19,13 +29,3 @@ trait McoActExtractor extends McoExtractor[MedicalAct] { case value: String => value.toInt } } - -object McoCcamActExtractor extends McoActExtractor { - final override val columnName: String = ColNames.CCAM - override val eventBuilder: EventBuilder = McoCCAMAct -} - -object McoCimMedicalActExtractor extends McoActExtractor { - final override val columnName: String = ColNames.DP - override val eventBuilder: EventBuilder = McoCIM10Act -} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala deleted file mode 100644 index 553c9458..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeActExtractor.scala +++ /dev/null @@ -1,19 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors.acts - - -import org.apache.spark.sql.DataFrame -import org.apache.spark.sql.functions.col -import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, McoCEAct, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeExtractor -import fr.polytechnique.cmap.cnam.etl.sources.Sources - -object McoCeActExtractor extends McoCeExtractor[MedicalAct] { - val columnName: String = ColNames.CamCode - override val eventBuilder: EventBuilder = McoCEAct - - override def getInput(sources: Sources): DataFrame = { - sources.mcoCe.get.select((ColNames.CamCode :: ColNames.core).map(col): _*) - } -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeCcamActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeCcamActExtractor.scala new file mode 100644 index 00000000..4c09da9c --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeCcamActExtractor.scala @@ -0,0 +1,16 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.acts + + +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, McoCeCcamAct, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeBasicExtractor + +final case class McoCeCcamActExtractor(codes: BaseExtractorCodes) extends McoCeBasicExtractor[MedicalAct] + with StartsWithStrategy[MedicalAct] { + override val eventBuilder: EventBuilder = McoCeCcamAct + override val columnName: String = ColNames.CamCode + + override def getCodes: BaseExtractorCodes = codes +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/MedicalActsConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/MedicalActsConfig.scala index 5c1d6758..d3bf4312 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/MedicalActsConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/MedicalActsConfig.scala @@ -19,7 +19,7 @@ class MedicalActsConfig( val ssrCECodes: List[String], val ssrCSARRCodes: List[String], val hadCCAMCodes: List[String] - ) extends ExtractorConfig +) extends ExtractorConfig object MedicalActsConfig { @@ -41,6 +41,7 @@ object MedicalActsConfig { ssrCSARRCodes, ssrCCAMCodes, ssrCECodes, - hadCCAMCodes) + hadCCAMCodes + ) } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrActExtractor.scala index 1db7434f..cea13765 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrActExtractor.scala @@ -1,27 +1,33 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts -import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.ssr.SsrExtractor -import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, MedicalAct, SsrCCAMAct, SsrCSARRAct} +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.ssr.SsrBasicExtractor -object SsrCcamActExtractor extends SsrExtractor[MedicalAct] { - final override val columnName: String = ColNames.CCAM +final case class SsrCcamActExtractor(codes: BaseExtractorCodes) extends SsrBasicExtractor[MedicalAct] with + StartsWithStrategy[MedicalAct] { + override val columnName: String = ColNames.CCAM override val eventBuilder: EventBuilder = SsrCCAMAct + + override def getCodes: BaseExtractorCodes = codes } /** Extract Csarr codes : * - * The Specific Catalogue of Acts of Rehabilitation and Rehabilitation (CSARR) is intended to - * describe and code the activity of the professionals concerned in follow-up care and - * rehabilitation establishments (SSR). These acts are to be distinguished from CCAM acts which - * are the sole responsibility of the doctor. + * The Specific Catalogue of Acts of Rehabilitation and Rehabilitation (CSARR) is intended to + * describe and code the activity of the professionals concerned in follow-up care and + * rehabilitation establishments (SSR). These acts are to be distinguished from CCAM acts which + * are the sole responsibility of the doctor. * - * This terminology is of the form `AAA+111`, eg. *GKQ+139 : Évaluation initiale du langage écrit* + * This terminology is of the form `AAA+111`, eg. *GKQ+139 : Évaluation initiale du langage écrit* * - * The complete terminology can be found here : https://drees.shinyapps.io/dico-snds/?variable=FP_PEC&search=csar&table=T_SSRaa_nnB - * For more details see : https://www.atih.sante.fr/sites/default/files/public/content/3302/csarr_2018.pdf + * The complete terminology can be found here : https://drees.shinyapps.io/dico-snds/?variable=FP_PEC&search=csar&table=T_SSRaa_nnB + * For more details see : https://www.atih.sante.fr/sites/default/files/public/content/3302/csarr_2018.pdf */ -object SsrCsarrActExtractor extends SsrExtractor[MedicalAct] { - final override val columnName: String = ColNames.CSARR +final case class SsrCsarrActExtractor(codes: BaseExtractorCodes) extends SsrBasicExtractor[MedicalAct] with + StartsWithStrategy[MedicalAct] { + override val columnName: String = ColNames.CSARR override val eventBuilder: EventBuilder = SsrCSARRAct + + override def getCodes: BaseExtractorCodes = codes } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala index 6a691b68..4bd2eea2 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala @@ -1,45 +1,14 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts -import java.sql.{Date, Timestamp} -import org.apache.spark.sql.{DataFrame, Row, functions} -import fr.polytechnique.cmap.cnam.etl.events.{Event, SsrCEAct, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.Extractor -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.datetime.implicits._ +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, MedicalAct, SsrCEAct} +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.ssrce.SsrCeBasicExtractor -object SsrCeActExtractor extends Extractor[MedicalAct] with SsrCeSourceExtractor { - override def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(getCode(row).startsWith(_)) +final case class SsrCeActExtractor(codes: BaseExtractorCodes) extends SsrCeBasicExtractor[MedicalAct] + with StartsWithStrategy[MedicalAct] { + override def columnName: String = ColNames.CamCode - override def isInExtractorScope(row: Row): Boolean = !isNullAt(ColNames.CamCode)(row) - - override def builder(row: Row): Seq[Event[MedicalAct]] = { - lazy val patientID = getPatientID(row) - lazy val date = getDate(row) - lazy val code = getCode(row) - // todo + tard, on peut recuperer le ETA_NUM + numero du pfs de santé à la place de ACE pr groupID - Seq(SsrCEAct(patientID, "ACE", code, date)) - } - - override def getInput(sources: Sources): DataFrame = - sources.ssrCe.get.select(ColNames.all.map(functions.col): _*) -} - -trait SsrCeSourceExtractor { - - def getPatientID(row: Row): String = row.getAs[String](ColNames.PatientID) - - def getDate(row: Row): Timestamp = row.getAs[Date](ColNames.Date).toTimestamp - - def getCode(row: Row): String = row.getAs[String](ColNames.CamCode) - - def isNullAt(colName: String)(row: Row): Boolean = row.isNullAt(row.fieldIndex(colName)) - - final object ColNames extends Serializable { - final lazy val PatientID = "NUM_ENQ" - final lazy val CamCode = "SSR_FMSTC__CCAM_COD" - final lazy val Date = "EXE_SOI_DTD" - final lazy val all = List(PatientID, CamCode, Date) - } + override def eventBuilder: EventBuilder = SsrCEAct + override def getCodes: BaseExtractorCodes = codes } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GhmExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GhmExtractor.scala index ac730ae8..cd802950 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GhmExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GhmExtractor.scala @@ -3,9 +3,13 @@ package fr.polytechnique.cmap.cnam.etl.extractors.classifications import fr.polytechnique.cmap.cnam.etl.events.{Classification, EventBuilder, GHMClassification} -import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoBasicExtractor -object GhmExtractor extends McoExtractor[Classification] { - final override val columnName: String = ColNames.GHM +final case class GhmExtractor(codes: BaseExtractorCodes) extends McoBasicExtractor[Classification] + with StartsWithStrategy[Classification] { + override val columnName: String = ColNames.GHM override val eventBuilder: EventBuilder = GHMClassification -} + + override def getCodes: BaseExtractorCodes = codes +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirBasicExtractor.scala new file mode 100644 index 00000000..0bb81912 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirBasicExtractor.scala @@ -0,0 +1,13 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.dcir + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col +import fr.polytechnique.cmap.cnam.etl.events.AnyEvent +import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +trait DcirBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with DcirRowExtractor { + def getInput(sources: Sources): DataFrame = sources.dcir.get.select(neededColumns.map(col): _*) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirRowExtractor.scala similarity index 50% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirRowExtractor.scala index 96580f4d..1f8d49ef 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirRowExtractor.scala @@ -5,50 +5,33 @@ package fr.polytechnique.cmap.cnam.etl.extractors.dcir import java.sql.Timestamp import scala.util.Try import org.apache.commons.codec.binary.Base64 -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} -import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} -import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} -import fr.polytechnique.cmap.cnam.etl.sources.Sources +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor import fr.polytechnique.cmap.cnam.util.datetime.implicits._ -trait DcirExtractor[EventType <: AnyEvent] extends Extractor[EventType] with DcirSource with EventRowExtractor { - val columnName: String +trait DcirRowExtractor extends DcirSource with EventRowExtractor { - val eventBuilder: EventBuilder - - def getInput(sources: Sources): DataFrame = sources.dcir.get.select(ColNames.all.map(col): _*) - - def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(code(row).startsWith(_)) - - def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(columnName)) - - def builder(row: Row): Seq[Event[EventType]] = { - lazy val patientId = extractPatientId(row) - lazy val groupId = extractGroupId(row) - lazy val eventDate = extractStart(row) - lazy val endDate = extractEnd(row) - lazy val weight = extractWeight(row) - - Seq(eventBuilder[EventType](patientId, groupId, code(row), weight, eventDate, endDate)) - } - - def code = (row: Row) => row.getAs[String](columnName) + override def usedColumns: List[ColName] = List( + ColNames.PatientID, ColNames.DcirFluxDate, ColNames.DcirEventStart, + ColNames.FlowDistributionDate, ColNames.FlowTreatementDate, ColNames.FlowEmitterType, + ColNames.FlowEmitterId, ColNames.FlowEmitterNumber, + ColNames.OrgId, ColNames.OrderId, ColNames.DcirEventStart + ) ++ super.usedColumns def extractPatientId(r: Row): String = { r.getAs[String](ColNames.PatientID) } - /** Trying to catch unknown dates + /** Trying to catch unknown dates * example of unknown dates situation : IJ = Indemnité Journalière which are a replacement income * paid by the HealthCare Insurance during a sick leave. + * * @param r The Row object itself * @return The date of the event or the flux date if it doesn't exist */ def extractStart(r: Row): Timestamp = { - Try(r.getAs[java.util.Date](ColNames.Date).toTimestamp) recover { + Try(r.getAs[java.util.Date](ColNames.DcirEventStart).toTimestamp) recover { case _: NullPointerException => extractFluxDate(r) } }.get @@ -65,12 +48,12 @@ trait DcirExtractor[EventType <: AnyEvent] extends Extractor[EventType] with Dci */ override def extractGroupId(r: Row): String = { Base64.encodeBase64( - s"${r.getAs[String](ColNames.DateStart)}_${r.getAs[String](ColNames.DateEntry)}_${ + s"${r.getAs[String](ColNames.FlowDistributionDate)}_${r.getAs[String](ColNames.FlowTreatementDate)}_${ r.getAs[String]( ColNames - .EmitterType + .FlowEmitterType ) - }_${r.getAs[String](ColNames.EmitterId)}_${r.getAs[String](ColNames.FlowNumber)}_${ + }_${r.getAs[String](ColNames.FlowEmitterId)}_${r.getAs[String](ColNames.FlowEmitterNumber)}_${ r.getAs[String]( ColNames .OrgId @@ -80,3 +63,4 @@ trait DcirExtractor[EventType <: AnyEvent] extends Extractor[EventType] with Dci } } + diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala index 2a8aac6b..009b1295 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala @@ -17,16 +17,20 @@ trait DcirSource extends ColumnNames { lazy val GHSCode: String = "ER_ETE_F__ETE_GHS_NUM" lazy val InstitutionCode: String = "ER_ETE_F__ETE_TYP_COD" lazy val Sector: String = "ER_ETE_F__PRS_PPU_SEC" - lazy val Date: String = "EXE_SOI_DTD" lazy val NaturePrestation: ColName = "PRS_NAT_REF" lazy val NgapCoefficient: ColName = "PRS_ACT_CFT" + + lazy val core = ( + PatientID, DcirEventStart, DcirFluxDate, FlowDistributionDate, FlowTreatementDate, + FlowEmitterId, FlowEmitterNumber, OrderId, OrgId + ) lazy val all = List( PatientID, CamCode, GHSCode, InstitutionCode, Sector, - Date, + DcirEventStart, MSpe, NonMSpe, ExecPSNum, @@ -36,13 +40,13 @@ trait DcirSource extends ColumnNames { ) - lazy val DateStart: ColName = "FLX_DIS_DTD" - lazy val DateEntry: ColName = "FLX_TRT_DTD" - lazy val EmitterType: ColName = "FLX_EMT_TYP" - lazy val EmitterId: ColName = "FLX_EMT_NUM" - lazy val FlowNumber: ColName = "FLX_EMT_ORD" - lazy val OrgId: ColName = "ORG_CLE_NUM" - lazy val OrderId: ColName = "DCT_ORD_NUM" + lazy val FlowDistributionDate: ColName = "FLX_DIS_DTD" + lazy val FlowTreatementDate: ColName = "FLX_TRT_DTD" + lazy val FlowEmitterType: ColName = "FLX_EMT_TYP" + lazy val FlowEmitterId: ColName = "FLX_EMT_NUM" + lazy val FlowEmitterNumber: ColName = "FLX_EMT_ORD" + lazy val OrgId: ColName = "ORG_CLE_NUM" + lazy val OrderId: ColName = "DCT_ORD_NUM" } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosisExtractor.scala index 17fa996a..5a0abbed 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosisExtractor.scala @@ -1,14 +1,21 @@ package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses -import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.had.HadExtractor +import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, EventBuilder, HadAssociatedDiagnosis, HadMainDiagnosis} +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.had.HadBasicExtractor -object HadMainDiagnosisExtractor extends HadExtractor[Diagnosis] { - final override val columnName: String = ColNames.DP +final case class HadMainDiagnosisExtractor(codes: BaseExtractorCodes) extends HadBasicExtractor[Diagnosis] with + StartsWithStrategy[Diagnosis] { + override val columnName: String = ColNames.DP override val eventBuilder: EventBuilder = HadMainDiagnosis + + override def getCodes: BaseExtractorCodes = codes } -object HadAssociatedDiagnosisExtractor extends HadExtractor[Diagnosis] { - final override val columnName: String = ColNames.DA +final case class HadAssociatedDiagnosisExtractor(codes: BaseExtractorCodes) extends HadBasicExtractor[Diagnosis] with + StartsWithStrategy[Diagnosis] { + override val columnName: String = ColNames.DA override val eventBuilder: EventBuilder = HadAssociatedDiagnosis + + override def getCodes: BaseExtractorCodes = codes } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbCimDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbCimDiagnosisExtractor.scala new file mode 100644 index 00000000..d4fdf9f4 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbCimDiagnosisExtractor.scala @@ -0,0 +1,32 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses + +import org.apache.spark.sql.{DataFrame, Row} +import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, EventBuilder, ImbCcamDiagnosis} +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, IsInStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.imb.ImbBasicExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources + + +final case class ImbCimDiagnosisExtractor(codes: BaseExtractorCodes) extends ImbBasicExtractor[Diagnosis] + with IsInStrategy[Diagnosis] { + + override def isInExtractorScope(row: Row): Boolean = { + lazy val idx = row.fieldIndex(ColNames.Code) + extractEncoding(row) == "CIM10" || !row.isNullAt(idx) + } + + override def isInStudy(row: Row): Boolean = codes.exists(extractValue(row).startsWith(_)) + + override def getInput(sources: Sources): DataFrame = sources.irImb.get + + override def columnName: String = ColNames.Code + + override def eventBuilder: EventBuilder = ImbCcamDiagnosis + + override def neededColumns: List[String] = + List(ColNames.PatientID, ColNames.Date, ColNames.Encoding, ColNames.Code, ColNames.EndDate) + + override def getCodes: BaseExtractorCodes = codes +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosisExtractor.scala deleted file mode 100644 index 8b853cb5..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosisExtractor.scala +++ /dev/null @@ -1,87 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses - -import scala.util.Try - -import java.sql.{Date, Timestamp} - -import org.apache.spark.sql.{DataFrame, Row} - -import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event, ImbDiagnosis} -import fr.polytechnique.cmap.cnam.etl.extractors.Extractor -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.datetime -import fr.polytechnique.cmap.cnam.util.datetime.implicits._ -import fr.polytechnique.cmap.cnam.util.functions.makeTS - - -object ImbDiagnosisExtractor extends Extractor[Diagnosis] with ImbSource { - - override def isInExtractorScope(row: Row): Boolean = { - lazy val idx = row.fieldIndex(ColNames.Code) - getEncoding(row) == "CIM10" || !row.isNullAt(idx) - } - - override def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(getCode(row).startsWith(_)) - - override def builder(row: Row): Seq[Event[Diagnosis]] = - Seq(ImbDiagnosis(getPatientID(row), getCode(row), getEventDate(row), getEventEnd(row))) - - override def getInput(sources: Sources): DataFrame = sources.irImb.get -} - -/** IR_IMB_R contains the Chronic Diseases diagnoses (ALD = Affection Longue Duree) for patients once - * they have been exonerated for all cares related to this Chronic Disease. - * It is the medical service of the health insurance that grants this ALD on the proposal of the - * patient's main physician (Medecin Traitant). - * See the [online snds documentation for further details](https://documentation-snds.health-data-hub.fr/fiches/beneficiaires_ald.html#le-dispositif-des-ald) - * - */ -trait ImbSource extends Serializable { - - lazy val getCode = (row: Row) => row.getAs[String](ColNames.Code) - - def getEncoding(row: Row): String = row.getAs[String](ColNames.Encoding) - - def getPatientID(row: Row): String = row.getAs[String](ColNames.PatientID) - - def getEventDate(row: Row): Timestamp = { - import datetime.implicits._ - - row.getAs[Date](ColNames.Date).toTimestamp - } - - /** - * The End date of the ALD is not always written. It can takes the value 1600-01-01 which - * corresponds to a None value (not set) that we convert to None. - * See the CNAM documentation [available here](https://documentation-snds.health-data-hub.fr/fiches/beneficiaires_ald.html#annexe) - * - * @param r - * @return - */ - def getEventEnd(r: Row): Option[Timestamp] = { - Try({ - val rawEndDate = r.getAs[java.util.Date](ColNames.EndDate).toTimestamp - - if (makeTS(1700, 1 ,1).after(rawEndDate)){ - None - } - else { - Some(rawEndDate) - } - }) recover { - case _: NullPointerException => None - } - }.get - - final object ColNames extends Serializable { - final lazy val PatientID = "NUM_ENQ" - final lazy val Encoding = "MED_NCL_IDT" - final lazy val Code = "MED_MTF_COD" - final lazy val Date = "IMB_ALD_DTD" - final lazy val EndDate = "IMB_ALD_DTF" - } - -} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosisExtractor.scala index 88cf61df..8e0af3b8 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosisExtractor.scala @@ -1,25 +1,28 @@ package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoBasicExtractor -class McoMainDiagnosisExtractor extends McoExtractor[Diagnosis] { - final override val columnName: String = ColNames.DP +protected trait BasicMcoDiagnosisExtractor extends McoBasicExtractor[Diagnosis] with StartsWithStrategy[Diagnosis] + +case class McoMainDiagnosisExtractor(codes: BaseExtractorCodes) extends BasicMcoDiagnosisExtractor { + override val columnName: String = ColNames.DP override val eventBuilder: EventBuilder = McoMainDiagnosis -} -object McoMainDiagnosisExtractor extends McoMainDiagnosisExtractor + override def getCodes: BaseExtractorCodes = codes +} -class McoAssociatedDiagnosisExtractor extends McoExtractor[Diagnosis] { - final override val columnName: String = ColNames.DA +case class McoAssociatedDiagnosisExtractor(codes: BaseExtractorCodes) extends BasicMcoDiagnosisExtractor { + override val columnName: String = ColNames.DA override val eventBuilder: EventBuilder = McoAssociatedDiagnosis -} -object McoAssociatedDiagnosisExtractor extends McoAssociatedDiagnosisExtractor + override def getCodes: BaseExtractorCodes = codes +} -class McoLinkedDiagnosisExtractor extends McoExtractor[Diagnosis] { - final override val columnName: String = ColNames.DR +case class McoLinkedDiagnosisExtractor(codes: BaseExtractorCodes) extends BasicMcoDiagnosisExtractor { + override val columnName: String = ColNames.DR override val eventBuilder: EventBuilder = McoLinkedDiagnosis -} -object McoLinkedDiagnosisExtractor extends McoLinkedDiagnosisExtractor + override def getCodes: BaseExtractorCodes = codes +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosisExtractor.scala index 8438e494..e9890634 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosisExtractor.scala @@ -1,24 +1,30 @@ package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses -import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.ssr.SsrExtractor +import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, EventBuilder, SsrAssociatedDiagnosis, SsrLinkedDiagnosis, SsrMainDiagnosis, SsrTakingOverPurpose} +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.ssr.SsrBasicExtractor -object SsrMainDiagnosisExtractor extends SsrExtractor[Diagnosis] { - final override val columnName: String = ColNames.DP +protected sealed abstract class SsrDiagnosisExtractor(codes: BaseExtractorCodes) extends SsrBasicExtractor[Diagnosis] with + StartsWithStrategy[Diagnosis] { + override def getCodes: BaseExtractorCodes = codes +} + +final case class SsrMainDiagnosisExtractor(codes: BaseExtractorCodes) extends SsrDiagnosisExtractor(codes) { + override val columnName: String = ColNames.DP override val eventBuilder: EventBuilder = SsrMainDiagnosis } -object SsrAssociatedDiagnosisExtractor extends SsrExtractor[Diagnosis] { - final override val columnName: String = ColNames.DA +final case class SsrAssociatedDiagnosisExtractor(codes: BaseExtractorCodes) extends SsrDiagnosisExtractor(codes) { + override val columnName: String = ColNames.DA override val eventBuilder: EventBuilder = SsrAssociatedDiagnosis } -object SsrLinkedDiagnosisExtractor extends SsrExtractor[Diagnosis] { - final override val columnName: String = ColNames.DR +final case class SsrLinkedDiagnosisExtractor(codes: BaseExtractorCodes) extends SsrDiagnosisExtractor(codes) { + override val columnName: String = ColNames.DR override val eventBuilder: EventBuilder = SsrLinkedDiagnosis } -object SsrTakingOverPurposeExtractor extends SsrExtractor[Diagnosis] { - final override val columnName: String = ColNames.FP_PEC +final case class SsrTakingOverPurposeExtractor(codes: BaseExtractorCodes) extends SsrDiagnosisExtractor(codes) { + override val columnName: String = ColNames.FP_PEC override val eventBuilder: EventBuilder = SsrTakingOverPurpose } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugConfig.scala index 0db3c570..515155a2 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugConfig.scala @@ -2,13 +2,15 @@ package fr.polytechnique.cmap.cnam.etl.extractors.drugs -import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig +import fr.polytechnique.cmap.cnam.etl.extractors.{ExtractorCodes, ExtractorConfig} import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.DrugClassificationLevel class DrugConfig( val level: DrugClassificationLevel, - val families: List[DrugClassConfig]) extends ExtractorConfig with Serializable + val families: List[DrugClassConfig]) extends ExtractorConfig with ExtractorCodes { + override def isEmpty: Boolean = families.isEmpty +} object DrugConfig { def apply(level: DrugClassificationLevel, families: List[DrugClassConfig]): DrugConfig = new DrugConfig( diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala index a79e5b03..39db7ffb 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala @@ -3,39 +3,41 @@ package fr.polytechnique.cmap.cnam.etl.extractors.drugs import java.sql.Timestamp -import scala.reflect.runtime.universe import org.apache.commons.codec.binary.Base64 -import org.apache.spark.sql._ +import org.apache.spark.sql.{Column, DataFrame, Row} import org.apache.spark.sql.functions.{col, when} import org.apache.spark.sql.types.{StringType, TimestampType} import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} import fr.polytechnique.cmap.cnam.etl.extractors.Extractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { +class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug, DrugConfig] { - override def extract( - sources: Sources, - codes: Set[String]) - (implicit ctag: universe.TypeTag[Drug]): Dataset[Event[Drug]] = { + override def getCodes: DrugConfig = drugConfig - val input: DataFrame = getInput(sources) + override def isInStudy(row: Row): Boolean = drugConfig.level.isInFamily(drugConfig.families, row) - import input.sqlContext.implicits._ + override def isInExtractorScope(row: Row): Boolean = true + + override def builder(row: Row): Seq[Event[Drug]] = { + lazy val classification = drugConfig.level.getClassification(drugConfig.families)(row) - { - if (drugConfig.families.isEmpty) { - input.filter(isInExtractorScope _) - } - else { - input.filter(isInExtractorScope _).filter(isInStudy(codes) _) - } - }.flatMap(builder _).distinct() + lazy val patientID = getPatientID(row) + lazy val conditioning = getConditioning(row) + lazy val date = getEventDate(row) + lazy val groupID = extractGroupId(row) + + classification.map(code => Drug(patientID, code, conditioning, groupID, date)) } + private def getPatientID(row: Row): String = row.getAs[String](ColNames.PatientId) + + private def getConditioning(row: Row): Int = row.getAs[Int](ColNames.Conditioning) + + private def getEventDate(row: Row): Timestamp = row.getAs[Timestamp](ColNames.Date) /** It generate a hash using the values of these columns - *(FLX_DIS_DTD,FLX_TRT_DTD,FLX_EMT_TYP,FLX_EMT_NUM,FLX_EMT_ORD,ORG_CLE_NUM,DCT_ORD_NUM). + * (FLX_DIS_DTD,FLX_TRT_DTD,FLX_EMT_TYP,FLX_EMT_NUM,FLX_EMT_ORD,ORG_CLE_NUM,DCT_ORD_NUM). * It allows to identify each prescription in a unique way, it can be used to identify * the possible interactions of molecules prescript in the same period. * @@ -58,29 +60,6 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { ).map(_.toChar).mkString } - - override def isInStudy(codes: Set[String]) - (row: Row): Boolean = drugConfig.level.isInFamily(drugConfig.families, row) - - override def isInExtractorScope(row: Row): Boolean = true - - override def builder(row: Row): Seq[Event[Drug]] = { - lazy val classification = drugConfig.level.getClassification(drugConfig.families)(row) - - lazy val patientID = getPatientID(row) - lazy val conditioning = getConditioning(row) - lazy val date = getEventDate(row) - lazy val groupID = extractGroupId(row) - - classification.map(code => Drug(patientID, code, conditioning, groupID, date)) - } - - private def getPatientID(row: Row): String = row.getAs[String](ColNames.PatientId) - - private def getConditioning(row: Row): Int = row.getAs[Int](ColNames.Conditioning) - - private def getEventDate(row: Row): Timestamp = row.getAs[Timestamp](ColNames.Date) - override def getInput(sources: Sources): DataFrame = { val neededColumns: List[Column] = List( @@ -94,8 +73,6 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { lazy val irPhaR = sources.irPha.get lazy val dcir = sources.dcir.get - val spark: SparkSession = dcir.sparkSession - lazy val df: DataFrame = dcir.join(irPhaR, dcir.col("ER_PHA_F__PHA_PRS_C13") === irPhaR.col("PHA_CIP_C13")) df @@ -105,11 +82,6 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { } final object ColNames extends Serializable { - val PatientId = "patientID" - val Conditioning = "conditioning" - val Date = "eventDate" - val Cip13 = "CIP13" - lazy val FluxDate = "FLX_DIS_DTD" lazy val FluxProcessingDate = "FLX_TRT_DTD" lazy val EmitterType = "FLX_EMT_TYP" @@ -117,10 +89,17 @@ class DrugExtractor(drugConfig: DrugConfig) extends Extractor[Drug] { lazy val FluxSeqNumber = "FLX_EMT_ORD" lazy val OrganisationOldId = "ORG_CLE_NUM" lazy val OrganisationDecompteNumber = "DCT_ORD_NUM" - lazy val GroupID = List( FluxDate, FluxProcessingDate, EmitterType, EmitterId, FluxSeqNumber, OrganisationOldId, OrganisationDecompteNumber ) + val PatientId = "patientID" + val Conditioning = "conditioning" + val Date = "eventDate" + val Cip13 = "CIP13" } } + +object DrugExtractor { + def apply(drugConfig: DrugConfig): DrugExtractor = new DrugExtractor(drugConfig) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Cardiac.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Cardiac.scala index 23968369..ad62431f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Cardiac.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Cardiac.scala @@ -7,19 +7,19 @@ import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClass object Cardiac extends DrugClassConfig { override val name: String = "CardiacTherapy" override val cip13Codes: Set[String] = Set( - "3400933489045", - "3400930313374", - "3400930313206", - "3400930193945", - "3400931163411", - "3400932346554", - "3400933466091", - "3400930313893" + "3400933489045", + "3400930313374", + "3400930313206", + "3400930193945", + "3400931163411", + "3400932346554", + "3400933466091", + "3400930313893" ) - val cardiacGlycosides = new PharmacologicalClassConfig( name = "CardiacGlycosides", ATCCodes = List("C01AA*") ) override val pharmacologicalClasses: List[PharmacologicalClassConfig] = List(cardiacGlycosides) + } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Opioids.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Opioids.scala index 8e05e086..9dc60a3d 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Opioids.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Opioids.scala @@ -7,474 +7,474 @@ import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClass object Opioids extends DrugClassConfig { override val name: String = "Opioids" override val cip13Codes: Set[String] = Set( - "3400938747393", - "3400939023649", - "3400935770073", - "3400936114548", - "3400936114487", - "3400934828096", - "3400937479462", - "3400937489928", - "3400937484145", - "3400934992308", - "3400934990007", - "3400934951374", - "3400930378540", - "3400933715014", - "3400935641281", - "3400930411469", - "3400927319143", - "3400936853690", - "3400934314797", - "3400934314568", - "3400934314278", - "3400934808166", - "3400934802362", - "3400936102132", - "3400926845926", - "3400938229783", - "3400936853812", - "3400935793195", - "3400926980801", - "3400926797348", - "3400936319714", - "3400927671029", - "3400939104874", - "3400935877154", - "3400935106704", - "3400934538193", - "3400939221601", - "3400939221540", - "3400935620699", - "3400936907553", - "3400934991295", - "3400935620170", - "3400930411230", - "3400931959328", - "3400936206830", - "3400936248182", - "3400936247123", - "3400936970366", - "3400935067340", - "3400935349897", - "3400938127676", - "3400938227024", - "3400931959038", - "3400931958956", - "3400935486554", - "3400931959496", - "3400935156440", - "3400935155788", - "3400938231045", - "3400936969247", - "3400936968936", - "3400936672598", - "3400935438478", - "3400927321214", - "3400938228373", - "3400936247642", - "3400936206779", - "3400936102651", - "3400927607035", - "3400927562396", - "3400927597237", - "3400935888730", - "3400936289406", - "3400936289284", - "3400938399097", - "3400939826240", - "3400936969995", - "3400938222920", - "3400936812420", - "3400936810709", - "3400936809758", - "3400936748804", - "3400933220754", - "3400933304652", - "3400933319090", - "3400934976339", - "3400934976278", - "3400936041295", - "3400936041417", - "3400938212983", - "3400933316778", - "3400935486783", - "3400935404640", - "3400934499760", - "3400936289635", - "3400935567031", - "3400935438249", - "3400922096032", - "3400922096322", - "3400936141391", - "3400935748713", - "3400935748652", - "3400936787193", - "3400934654329", - "3400934654039", - "3400938552454", - "3400935350039", - "3400937356473", - "3400938675443", - "3400938675214", - "3400935349958", - "3400937356305", - "3400937700177", - "3400935856913", - "3400934654787", - "3400934654558", - "3400930303320", - "3400934007286", - "3400935194121", - "3400934882845", - "3400935877505", - "3400935193810", - "3400937051323", - "3400937015943", - "3400933180850", - "3400930002285", - "3400930002278", - "3400926690908", - "3400926963422", - "3400938459852", - "3400939846460", - "3400934890659", - "3400939392325", - "3400934890888", - "3400933911881", - "3400934802133", - "3400938042887", - "3400938042597", - "3400939903392", - "3400939641706", - "3400939711874", - "3400938042139", - "3400921856996", - "3400921856828", - "3400922198088", - "3400921857658", - "3400921879599", - "3400949251087", - "3400949250547", - "3400949914821", - "3400949217540", - "3400949217250", - "3400949217199", - "3400930332016", - "3400939202099", - "3400939641355", - "3400939640983", - "3400939711935", - "3400938510232", - "3400927560446", - "3400927658532", - "3400927657702", - "3400934410932", - "3400934399435", - "3400934399084", - "3400934398544", - "3400934291463", - "3400934291234", - "3400934387609", - "3400934387258", - "3400927656989", - "3400927748752", - "3400927760587", - "3400927759239", - "3400927757976", - "3400939220710", - "3400938509342", - "3400938509861", - "3400934890420", - "3400932869947", - "3400927659591", - "3400938458732", - "3400935438300", - "3400936587281", - "3400936985155", - "3400935695161", - "3400936911635", - "3400936911055", - "3400936910683", - "3400939200668", - "3400939104355", - "3400938460223", - "3400938533415", - "3400938509113", - "3400938124255", - "3400939024479", - "3400938398618", - "3400926961121", - "3400939104584", - "3400939104416", - "3400939105765", - "3400939105185", - "3400939104935", - "3400939104706", - "3400934053702", - "3400938508741", - "3400938508512", - "3400938652840", - "3400938652321", - "3400927756337", - "3400927753725", - "3400927751653", - "3400939221021", - "3400927755095", - "3400931164531", - "3400934238536", - "3400934238307", - "3400934021305", - "3400934641541", - "3400922301938", - "3400934238475", - "3400936910515", - "3400936102422", - "3400936894563", - "3400934866067", - "3400935322982", - "3400922096261", - "3400935349729", - "3400949363940", - "3400939190051", - "3400936907782", - "3400936906891", - "3400936906372", - "3400936906143", - "3400936905771", - "3400935807366", - "3400935806826", - "3400927890031", - "3400938651089", - "3400938650488", - "3400938225532", - "3400936794238", - "3400926942489", - "3400935857392", - "3400935154729", - "3400939200439", - "3400934387487", - "3400933684792", - "3400934748042", - "3400939104645", - "3400939221489", - "3400939221311", - "3400939221250", - "3400939221199", - "3400939220888", - "3400939213507", - "3400939213446", - "3400932952724", - "3400932870028", - "3400932869718", - "3400922303420", - "3400922303079", - "3400922302768", - "3400922302300", - "3400921856477", - "3400935703477", - "3400930777473", - "3400935018663", - "3400930777534", - "3400933323813", - "3400933323752", - "3400935130594", - "3400939186788", - "3400936910454", - "3400936910225", - "3400949217489", - "3400939187679", - "3400935998651", - "3400932869886", - "3400949914531", - "3400934890130", - "3400927560965", - "3400927561337", - "3400927888601", - "3400935806307", - "3400935349378", - "3400939185668", - "3400939726205", - "3400939725192", - "3400935619921", - "3400935844217", - "3400935843845", - "3400936203068", - "3400936242388", - "3400936242159", - "3400936241909", - "3400935422279", - "3400927656750", - "3400939314952", - "3400939712017", - "3400921857948", - "3400949251209", - "3400949378678", - "3400934827846", - "3400922095950", - "3400936809178", - "3400930068571", - "3400921857139", - "3400949812134", - "3400949666621", - "3400949666331", - "3400949915590", - "3400933323691", - "3400933323523", - "3400931164821", - "3400939391144", - "3400939390543", - "3400939342757", - "3400939844510", - "3400930068519", - "3400930068649", - "3400921857887", - "3400921857719", - "3400921857597", - "3400921856767", - "3400921856538", - "3400921857368", - "3400921857078", - "3400949914302", - "3400938504897", - "3400935429483", - "3400935421500", - "3400933480059", - "3400933479978", - "3400933799229", - "3400927889721", - "3400927889370", - "3400927889080", - "3400927561108", - "3400939827070", - "3400935595874", - "3400935615558", - "3400936651548", - "3400934760228", - "3400939755588", - "3400939755878", - "3400939476803", - "3400939417899", - "3400939417370", - "3400932551897", - "3400930075722", - "3400933803681", - "3400939478173", - "3400939476223", - "3400939479415", - "3400935714244", - "3400939118833", - "3400926939359", - "3400927656699", - "3400935509369", - "3400936587632", - "3400930051047", - "3400933803452", - "3400939477404", - "3400939825649", - "3400930057834", - "3400932966332", - "3400935185884", - "3400935107534", - "3400935660091", - "3400935108074", - "3400935107183", - "3400935671677", - "3400930068960", - "3400930068892", - "3400927657641", - "3400926845117", - "3400927656811", - "3400927658471", - "3400927655630", - "3400927655920", - "3400927659423", - "3400930068823", - "3400935982452", - "3400927656002", - "3400930076033", - "3400930075937", - "3400930075623", - "3400937847698", - "3400934827617", - "3400935703767", - "3400930587508", - "3400935065100", - "3400935703248", - "3400921879711", - "3400936969537", - "3400936595965", - "3400949214587", - "3400936819344", - "3400935531650", - "3400931308492", - "3400939478814", - "3400939213736", - "3400921857429", - "3400936289055", - "3400935235893", - "3400930075388", - "3400934170195", - "3400933305314", - "3400932461219", - "3400939391892", - "3400939843629", - "3400927660313", - "3400927560675", - "3400930587737", - "3400935299130", - "3400938324747", - "3400933724467", - "3400938149500", - "3400936690349", - "3400935157041", - "3400935236036", - "3400936968646", - "3400937374514", - "3400938509571", - "3400927513541", - "3400935414007", - "3400926721466", - "3400926838072", - "3400935583239", - "3400935404701", - "3400935565839", - "3400930014103", - "3400930045350", - "3400939699257", - "3400938458442", - "3400935420909", - "3400927655869", - "3400935713063", - "3400933275815", - "3400937015653", - "3400949217311", - "3400939200729", - "3400934300660", - "3400936212053", - "3400938125955", - "3400935570970", - "3400936289925", - "3400939845340", - "3400936906204", - "3400936105034", - "3400939723471", - "3400935666826", - "3400935843494", - "3400935768872", - "3400938508970", - "3400938651720", - "3400936853751", - "3400939104294", - "3400934238765", - "3400949363599", - "3400939220949", - "3400933765910", - "3400927660252", - "3400935248442", - "3400926943141", - "3400935694911", - "3400939185897" + "3400938747393", + "3400939023649", + "3400935770073", + "3400936114548", + "3400936114487", + "3400934828096", + "3400937479462", + "3400937489928", + "3400937484145", + "3400934992308", + "3400934990007", + "3400934951374", + "3400930378540", + "3400933715014", + "3400935641281", + "3400930411469", + "3400927319143", + "3400936853690", + "3400934314797", + "3400934314568", + "3400934314278", + "3400934808166", + "3400934802362", + "3400936102132", + "3400926845926", + "3400938229783", + "3400936853812", + "3400935793195", + "3400926980801", + "3400926797348", + "3400936319714", + "3400927671029", + "3400939104874", + "3400935877154", + "3400935106704", + "3400934538193", + "3400939221601", + "3400939221540", + "3400935620699", + "3400936907553", + "3400934991295", + "3400935620170", + "3400930411230", + "3400931959328", + "3400936206830", + "3400936248182", + "3400936247123", + "3400936970366", + "3400935067340", + "3400935349897", + "3400938127676", + "3400938227024", + "3400931959038", + "3400931958956", + "3400935486554", + "3400931959496", + "3400935156440", + "3400935155788", + "3400938231045", + "3400936969247", + "3400936968936", + "3400936672598", + "3400935438478", + "3400927321214", + "3400938228373", + "3400936247642", + "3400936206779", + "3400936102651", + "3400927607035", + "3400927562396", + "3400927597237", + "3400935888730", + "3400936289406", + "3400936289284", + "3400938399097", + "3400939826240", + "3400936969995", + "3400938222920", + "3400936812420", + "3400936810709", + "3400936809758", + "3400936748804", + "3400933220754", + "3400933304652", + "3400933319090", + "3400934976339", + "3400934976278", + "3400936041295", + "3400936041417", + "3400938212983", + "3400933316778", + "3400935486783", + "3400935404640", + "3400934499760", + "3400936289635", + "3400935567031", + "3400935438249", + "3400922096032", + "3400922096322", + "3400936141391", + "3400935748713", + "3400935748652", + "3400936787193", + "3400934654329", + "3400934654039", + "3400938552454", + "3400935350039", + "3400937356473", + "3400938675443", + "3400938675214", + "3400935349958", + "3400937356305", + "3400937700177", + "3400935856913", + "3400934654787", + "3400934654558", + "3400930303320", + "3400934007286", + "3400935194121", + "3400934882845", + "3400935877505", + "3400935193810", + "3400937051323", + "3400937015943", + "3400933180850", + "3400930002285", + "3400930002278", + "3400926690908", + "3400926963422", + "3400938459852", + "3400939846460", + "3400934890659", + "3400939392325", + "3400934890888", + "3400933911881", + "3400934802133", + "3400938042887", + "3400938042597", + "3400939903392", + "3400939641706", + "3400939711874", + "3400938042139", + "3400921856996", + "3400921856828", + "3400922198088", + "3400921857658", + "3400921879599", + "3400949251087", + "3400949250547", + "3400949914821", + "3400949217540", + "3400949217250", + "3400949217199", + "3400930332016", + "3400939202099", + "3400939641355", + "3400939640983", + "3400939711935", + "3400938510232", + "3400927560446", + "3400927658532", + "3400927657702", + "3400934410932", + "3400934399435", + "3400934399084", + "3400934398544", + "3400934291463", + "3400934291234", + "3400934387609", + "3400934387258", + "3400927656989", + "3400927748752", + "3400927760587", + "3400927759239", + "3400927757976", + "3400939220710", + "3400938509342", + "3400938509861", + "3400934890420", + "3400932869947", + "3400927659591", + "3400938458732", + "3400935438300", + "3400936587281", + "3400936985155", + "3400935695161", + "3400936911635", + "3400936911055", + "3400936910683", + "3400939200668", + "3400939104355", + "3400938460223", + "3400938533415", + "3400938509113", + "3400938124255", + "3400939024479", + "3400938398618", + "3400926961121", + "3400939104584", + "3400939104416", + "3400939105765", + "3400939105185", + "3400939104935", + "3400939104706", + "3400934053702", + "3400938508741", + "3400938508512", + "3400938652840", + "3400938652321", + "3400927756337", + "3400927753725", + "3400927751653", + "3400939221021", + "3400927755095", + "3400931164531", + "3400934238536", + "3400934238307", + "3400934021305", + "3400934641541", + "3400922301938", + "3400934238475", + "3400936910515", + "3400936102422", + "3400936894563", + "3400934866067", + "3400935322982", + "3400922096261", + "3400935349729", + "3400949363940", + "3400939190051", + "3400936907782", + "3400936906891", + "3400936906372", + "3400936906143", + "3400936905771", + "3400935807366", + "3400935806826", + "3400927890031", + "3400938651089", + "3400938650488", + "3400938225532", + "3400936794238", + "3400926942489", + "3400935857392", + "3400935154729", + "3400939200439", + "3400934387487", + "3400933684792", + "3400934748042", + "3400939104645", + "3400939221489", + "3400939221311", + "3400939221250", + "3400939221199", + "3400939220888", + "3400939213507", + "3400939213446", + "3400932952724", + "3400932870028", + "3400932869718", + "3400922303420", + "3400922303079", + "3400922302768", + "3400922302300", + "3400921856477", + "3400935703477", + "3400930777473", + "3400935018663", + "3400930777534", + "3400933323813", + "3400933323752", + "3400935130594", + "3400939186788", + "3400936910454", + "3400936910225", + "3400949217489", + "3400939187679", + "3400935998651", + "3400932869886", + "3400949914531", + "3400934890130", + "3400927560965", + "3400927561337", + "3400927888601", + "3400935806307", + "3400935349378", + "3400939185668", + "3400939726205", + "3400939725192", + "3400935619921", + "3400935844217", + "3400935843845", + "3400936203068", + "3400936242388", + "3400936242159", + "3400936241909", + "3400935422279", + "3400927656750", + "3400939314952", + "3400939712017", + "3400921857948", + "3400949251209", + "3400949378678", + "3400934827846", + "3400922095950", + "3400936809178", + "3400930068571", + "3400921857139", + "3400949812134", + "3400949666621", + "3400949666331", + "3400949915590", + "3400933323691", + "3400933323523", + "3400931164821", + "3400939391144", + "3400939390543", + "3400939342757", + "3400939844510", + "3400930068519", + "3400930068649", + "3400921857887", + "3400921857719", + "3400921857597", + "3400921856767", + "3400921856538", + "3400921857368", + "3400921857078", + "3400949914302", + "3400938504897", + "3400935429483", + "3400935421500", + "3400933480059", + "3400933479978", + "3400933799229", + "3400927889721", + "3400927889370", + "3400927889080", + "3400927561108", + "3400939827070", + "3400935595874", + "3400935615558", + "3400936651548", + "3400934760228", + "3400939755588", + "3400939755878", + "3400939476803", + "3400939417899", + "3400939417370", + "3400932551897", + "3400930075722", + "3400933803681", + "3400939478173", + "3400939476223", + "3400939479415", + "3400935714244", + "3400939118833", + "3400926939359", + "3400927656699", + "3400935509369", + "3400936587632", + "3400930051047", + "3400933803452", + "3400939477404", + "3400939825649", + "3400930057834", + "3400932966332", + "3400935185884", + "3400935107534", + "3400935660091", + "3400935108074", + "3400935107183", + "3400935671677", + "3400930068960", + "3400930068892", + "3400927657641", + "3400926845117", + "3400927656811", + "3400927658471", + "3400927655630", + "3400927655920", + "3400927659423", + "3400930068823", + "3400935982452", + "3400927656002", + "3400930076033", + "3400930075937", + "3400930075623", + "3400937847698", + "3400934827617", + "3400935703767", + "3400930587508", + "3400935065100", + "3400935703248", + "3400921879711", + "3400936969537", + "3400936595965", + "3400949214587", + "3400936819344", + "3400935531650", + "3400931308492", + "3400939478814", + "3400939213736", + "3400921857429", + "3400936289055", + "3400935235893", + "3400930075388", + "3400934170195", + "3400933305314", + "3400932461219", + "3400939391892", + "3400939843629", + "3400927660313", + "3400927560675", + "3400930587737", + "3400935299130", + "3400938324747", + "3400933724467", + "3400938149500", + "3400936690349", + "3400935157041", + "3400935236036", + "3400936968646", + "3400937374514", + "3400938509571", + "3400927513541", + "3400935414007", + "3400926721466", + "3400926838072", + "3400935583239", + "3400935404701", + "3400935565839", + "3400930014103", + "3400930045350", + "3400939699257", + "3400938458442", + "3400935420909", + "3400927655869", + "3400935713063", + "3400933275815", + "3400937015653", + "3400949217311", + "3400939200729", + "3400934300660", + "3400936212053", + "3400938125955", + "3400935570970", + "3400936289925", + "3400939845340", + "3400936906204", + "3400936105034", + "3400939723471", + "3400935666826", + "3400935843494", + "3400935768872", + "3400938508970", + "3400938651720", + "3400936853751", + "3400939104294", + "3400934238765", + "3400949363599", + "3400939220949", + "3400933765910", + "3400927660252", + "3400935248442", + "3400926943141", + "3400935694911", + "3400939185897" ) override val pharmacologicalClasses: List[PharmacologicalClassConfig] = List.empty } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/ProtonPumpInhibitors.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/ProtonPumpInhibitors.scala index 1ff4a3db..817560fb 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/ProtonPumpInhibitors.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/ProtonPumpInhibitors.scala @@ -4,965 +4,965 @@ package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} -object ProtonPumpInhibitors extends DrugClassConfig{ +object ProtonPumpInhibitors extends DrugClassConfig { override val name: String = "ProtonPumpInhibitors" override val cip13Codes: Set[String] = Set( - "3400949001279", - "3400938104905", - "3400938268706", - "3400938278170", - "3400938277920", - "3400938277579", - "3400938277340", - "3400949766208", - "3400949015856", - "3400949015795", - "3400949337118", - "3400949336456", - "3400949001637", - "3400949011834", - "3400938103205", - "3400949011544", - "3400936291706", - "3400936291584", - "3400936290754", - "3400936287792", - "3400936288515", - "3400936287914", - "3400934868139", - "3400941787461", - "3400949337286", - "3400927818172", - "3400927823206", - "3400922392486", - "3400939074566", - "3400936747333", - "3400936884397", - "3400949014217", - "3400938104035", - "3400938144307", - "3400949001064", - "3400949498765", - "3400941889578", - "3400949000500", - "3400949001057", - "3400941788642", - "3400941783388", - "3400949001033", - "3400949001026", - "3400949001019", - "3400938954401", - "3400937530880", - "3400936483163", - "3400936676732", - "3400936674431", - "3400936673779", - "3400936673250", - "3400921838329", - "3400937827997", - "3400938782462", - "3400936747272", - "3400921854817", - "3400938112887", - "3400949946419", - "3400949968879", - "3400949941155", - "3400938106107", - "3400941859014", - "3400927481741", - "3400939307688", - "3400921850215", - "3400949117154", - "3400949117093", - "3400949116263", - "3400941902109", - "3400941889349", - "3400941888168", - "3400936887411", - "3400936887350", - "3400936884168", - "3400936883918", - "3400936883857", - "3400936805446", - "3400936805217", - "3400936676961", - "3400921992304", - "3400949004799", - "3400949003440", - "3400941787522", - "3400941607981", - "3400941784569", - "3400921994773", - "3400949008643", - "3400936804784", - "3400941904868", - "3400938119343", - "3400930003060", - "3400938142693", - "3400938140163", - "3400938112139", - "3400938112078", - "3400938109986", - "3400938109696", - "3400936483514", - "3400941611834", - "3400941610073", - "3400941609244", - "3400941608292", - "3400949008124", - "3400949007523", - "3400938106336", - "3400949015917", - "3400936747562", - "3400935674289", - "3400936281820", - "3400935842374", - "3400935974419", - "3400936284432", - "3400936886759", - "3400949001330", - "3400949000517", - "3400939213156", - "3400939524689", - "3400936883796", - "3400936282940", - "3400939072784", - "3400922389875", - "3400938118452", - "3400936804036", - "3400936483682", - "3400938117103", - "3400949761005", - "3400949946358", - "3400949001354", - "3400949001347", - "3400941781209", - "3400941781087", - "3400941780837", - "3400938117790", - "3400922489223", - "3400927607493", - "3400926663872", - "3400949699858", - "3400949970889", - "3400936288744", - "3400934081217", - "3400934130106", - "3400934052750", - "3400934455186", - "3400934455247", - "3400926957278", - "3400927568022", - "3400927567711", - "3400927567650", - "3400922296005", - "3400934415555", - "3400934415494", - "3400930011638", - "3400921927948", - "3400921928198", - "3400921848083", - "3400921847543", - "3400926955786", - "3400926954086", - "3400935585011", - "3400930060735", - "3400930060704", - "3400922295923", - "3400922296463", - "3400922197609", - "3400921836776", - "3400921611212", - "3400921633672", - "3400933839352", - "3400934869259", - "3400934869020", - "3400933839291", - "3400933839871", - "3400930011515", - "3400930002926", - "3400930011508", - "3400930060193", - "3400926823405", - "3400926823283", - "3400934963841", - "3400934475757", - "3400934868429", - "3400922197548", - "3400922197319", - "3400922403526", - "3400922095431", - "3400930060179", - "3400930060698", - "3400930060247", - "3400921636345", - "3400921634914", - "3400935674401", - "3400936862746", - "3400949762644", - "3400949719181", - "3400926816889", - "3400926815530", - "3400926957568", - "3400927359583", - "3400927359415", - "3400922037820", - "3400922037530", - "3400922037479", - "3400939030272", - "3400933341213", - "3400933341152", - "3400936323094", - "3400935533951", - "3400930066409", - "3400930066386", - "3400930066379", - "3400935585479", - "3400935584939", - "3400935584588", - "3400935533432", - "3400935533203", - "3400935585301", - "3400922037240", - "3400922036069", - "3400922034287", - "3400922033808", - "3400935584298", - "3400934706639", - "3400934081095", - "3400921849844", - "3400921852516", - "3400921852165", - "3400935584700", - "3400935584410", - "3400936282360", - "3400937991339", - "3400930070604", - "3400930070598", - "3400930070574", - "3400934456077", - "3400936871731", - "3400936871670", - "3400936871441", - "3400934410413", - "3400934303562", - "3400934081446", - "3400921928259", - "3400921995954", - "3400921994315", - "3400921998627", - "3400922000459", - "3400921993653", - "3400921992823", - "3400949766376", - "3400936668119", - "3400936282889", - "3400936805965", - "3400922463438", - "3400926955908", - "3400939389653", - "3400938778502", - "3400938776379", - "3400949721542", - "3400949721481", - "3400938991598", - "3400938277869", - "3400927932694", - "3400936288683", - "3400949766666", - "3400949718061", - "3400949755370", - "3400949494514", - "3400949759804", - "3400949495573", - "3400927823374", - "3400949718412", - "3400949718351", - "3400949702091", - "3400949701612", - "3400949494743", - "3400949757152", - "3400949758791", - "3400949758562", - "3400949757381", - "3400949487189", - "3400949970421", - "3400949946297", - "3400949946129", - "3400927931925", - "3400927931116", - "3400927930683", - "3400927871399", - "3400927871221", - "3400927929854", - "3400935881694", - "3400939307749", - "3400939307510", - "3400939255439", - "3400939541778", - "3400939271286", - "3400939322018", - "3400949754021", - "3400938703979", - "3400938703740", - "3400939075228", - "3400939030623", - "3400939073095", - "3400939030562", - "3400938117561", - "3400938117271", - "3400938119633", - "3400938119572", - "3400938112948", - "3400937827188", - "3400938118681", - "3400939321646", - "3400936287105", - "3400936287044", - "3400934549021", - "3400934706868", - "3400934705519", - "3400934097935", - "3400939030333", - "3400934455766", - "3400938991710", - "3400934705229", - "3400941784279", - "3400939072555", - "3400949003099", - "3400941787751", - "3400934130045", - "3400927931284", - "3400927929915", - "3400927366796", - "3400926665654", - "3400927418358", - "3400939239699", - "3400939321936", - "3400937991278", - "3400949719532", - "3400949718870", - "3400930011669", - "3400927607264", - "3400935533371", - "3400949968411", - "3400936284661", - "3400938193503", - "3400938108866", - "3400934606533", - "3400938104844", - "3400922402406", - "3400949753949", - "3400949718122", - "3400949498536", - "3400936287273", - "3400949755202", - "3400949719471", - "3400926953775", - "3400949946068", - "3400949945986", - "3400949946877", - "3400949946709", - "3400949946648", - "3400949941445", - "3400922462776", - "3400922465210", - "3400926668327", - "3400926667894", - "3400926661281", - "3400926663414", - "3400939541198", - "3400939320816", - "3400939320755", - "3400939213385", - "3400939213217", - "3400939213095", - "3400939239989", - "3400949767786", - "3400939030104", - "3400939072326", - "3400938781052", - "3400938780802", - "3400938778380", - "3400938785364", - "3400949939893", - "3400938118513", - "3400938118162", - "3400938194333", - "3400937974998", - "3400938194104", - "3400938193732", - "3400939541549", - "3400938784015", - "3400938186529", - "3400922403816", - "3400926951306", - "3400935514790", - "3400936290525", - "3400936282421", - "3400936668287", - "3400934304163", - "3400927607615", - "3400934415784", - "3400934475696", - "3400922491233", - "3400922490281", - "3400927417818", - "3400939322186", - "3400937530941", - "3400941902048", - "3400949026272", - "3400949014156", - "3400926661632", - "3400922489452", - "3400938407525", - "3400921634792", - "3400941783159", - "3400930060223", - "3400949000838", - "3400930011539", - "3400934097126", - "3400922296173", - "3400949769278", - "3400949769049", - "3400949767908", - "3400949498246", - "3400949498017", - "3400949762415", - "3400949000494", - "3400927357572", - "3400922491691", - "3400922491462", - "3400922490403", - "3400927824326", - "3400927823435", - "3400949721771", - "3400939257679", - "3400939257440", - "3400939541488", - "3400939540948", - "3400939656953", - "3400939656724", - "3400949761463", - "3400938778151", - "3400938776140", - "3400938993660", - "3400935941879", - "3400937828369", - "3400937828598", - "3400927357343", - "3400927352898", - "3400926766870", - "3400926766351", - "3400927607325", - "3400927481512", - "3400939389882", - "3400937828079", - "3400939075167", - "3400939080710", - "3400941785511", - "3400922036298", - "3400927357404", - "3400936482852", - "3400927822544", - "3400926766580", - "3400922033976", - "3400926822163", - "3400926815998", - "3400927818004", - "3400927817922", - "3400927822483", - "3400936482913", - "3400949721023", - "3400949720880", - "3400949720651", - "3400949719242", - "3400949721313", - "3400949718702", - "3400927417986", - "3400938785074", - "3400949761173", - "3400949717811", - "3400949717699", - "3400949765256", - "3400949765027", - "3400927821714", - "3400927821653", - "3400927800481", - "3400927568190", - "3400949718580", - "3400939388472", - "3400939388243", - "3400936323216", - "3400934476068", - "3400938408416", - "3400939816074", - "3400949763535", - "3400949760053", - "3400937827829", - "3400937827768", - "3400938116731", - "3400938116502", - "3400938113600", - "3400927481451", - "3400927481390", - "3400927418129", - "3400927418068", - "3400927417757", - "3400927933295", - "3400927933127", - "3400939815763", - "3400938113259", - "3400938113020", - "3400939079998", - "3400921609370", - "3400938175172", - "3400949339648", - "3400936324336", - "3400921636574", - "3400921628999", - "3400949341948", - "3400941657320", - "3400941896613", - "3400936806627", - "3400921607949", - "3400941895371", - "3400921793390", - "3400927932175", - "3400936290235", - "3400921851335", - "3400936484115", - "3400927930973", - "3400936673830", - "3400921792560", - "3400936281240", - "3400934615528", - "3400936325975", - "3400949766727", - "3400949766437", - "3400936672949", - "3400949011773", - "3400949022311", - "3400949022250", - "3400949341887", - "3400949341719", - "3400949341368", - "3400949341078", - "3400941904400", - "3400949016686", - "3400949016457", - "3400949623228", - "3400941659041", - "3400941658969", - "3400941895661", - "3400941894312", - "3400941893131", - "3400941892998", - "3400941892820", - "3400949338986", - "3400949338818", - "3400941658730", - "3400941657498", - "3400949943456", - "3400949021598", - "3400949010073", - "3400921636284", - "3400949943395", - "3400930000755", - "3400927873690", - "3400949026562", - "3400949026333", - "3400949015047", - "3400949014965", - "3400949014675", - "3400921627701", - "3400921627589", - "3400949011605", - "3400949948079", - "3400921849035", - "3400921609660", - "3400927368110", - "3400934455827", - "3400935583987", - "3400927930805", - "3400939257389", - "3400939307220", - "3400938174342", - "3400921611502", - "3400949948130", - "3400926802462", - "3400936281189", - "3400926814410", - "3400935584069", - "3400936480551", - "3400949948369", - "3400949339709", - "3400939146348", - "3400949022021", - "3400939154503", - "3400939151250", - "3400936324626", - "3400922037301", - "3400921790559", - "3400939655482", - "3400936325227", - "3400927884467", - "3400949014736", - "3400949016518", - "3400949947997", - "3400938105155", - "3400938140224", - "3400922471761", - "3400938111828", - "3400936325807", - "3400936325395", - "3400936325166", - "3400937974769", - "3400938268416", - "3400936480612", - "3400936583610", - "3400936677043", - "3400936674080", - "3400936673540", - "3400938140453", - "3400936484283", - "3400936673199", - "3400936673021", - "3400936672710", - "3400936388611", - "3400936388550", - "3400936480490", - "3400936672659", - "3400936668577", - "3400938174922", - "3400938174052", - "3400936668409", - "3400936668348", - "3400939080888", - "3400939389592", - "3400933839932", - "3400939657325", - "3400939151779", - "3400939657264", - "3400939657035", - "3400939154732", - "3400939154442", - "3400939151601", - "3400936356153", - "3400939151489", - "3400939321707", - "3400939145228", - "3400939307398", - "3400938119282", - "3400938119114", - "3400939146287", - "3400939080130", - "3400938119053", - "3400939154213", - "3400939154091", - "3400934095924", - "3400935533661", - "3400935533142", - "3400934303913", - "3400935533081", - "3400936280939", - "3400936290983", - "3400936290006", - "3400936034372", - "3400927933417", - "3400922390826", - "3400921623796", - "3400921793222", - "3400921901405", - "3400936323674", - "3400921640137", - "3400921639995", - "3400921638707", - "3400921633443", - "3400921633382", - "3400921604818", - "3400926802004", - "3400926800802", - "3400926813529", - "3400926813178", - "3400921626698", - "3400921626469", - "3400926664992", - "3400926662813", - "3400927931406", - "3400927884238", - "3400927884009", - "3400921848953", - "3400921780093", - "3400922471532", - "3400921638646", - "3400921637694", - "3400949948420", - "3400927930515", - "3400927930393", - "3400927871689", - "3400921851274", - "3400921779493", - "3400921779325", - "3400921623567", - "3400921611380", - "3400921608199", - "3400936583498", - "3400936672888", - "3400922296234", - "3400927800832", - "3400921607888", - "3400921622447", - "3400921606010", - "3400922392196", - "3400922393438", - "3400922393087", - "3400921632552", - "3400922000510", - "3400922000398", - "3400921906837", - "3400936324565", - "3400921632323", - "3400921630251", - "3400921629941", - "3400927932236", - "3400927920639", - "3400930002896", - "3400930000724", - "3400930003039", - "3400922470702", - "3400930015384", - "3400934475528", - "3400935533722", - "3400938268584", - "3400938103953", - "3400936673311", - "3400941610424", - "3400941788413", - "3400949117963", - "3400936483743", - "3400936673601", - "3400938785654", - "3400921928020", - "3400922036359", - "3400936291645", - "3400949004621", - "3400935514561", - "3400930027288", - "3400949946587", - "3400949116324", - "3400941888458", - "3400936887299", - "3400936805385", - "3400949337347", - "3400941785689", - "3400938105445", - "3400936805736", - "3400941609824", - "3400921623338", - "3400949336517", - "3400939524450", - "3400936290815", - "3400936287853", - "3400938108576", - "3400922033747", - "3400949763993", - "3400949721191", - "3400939255729", - "3400939239521", - "3400936290464", - "3400926951535", - "3400949700202", - "3400949755141", - "3400938673081", - "3400939072845", - "3400949007691", - "3400936804845", - "3400938954340", - "3400936862685", - "3400936281530", - "3400936286214", - "3400941906299", - "3400949118793", - "3400926822453", - "3400936674370", - "3400937991568", - "3400934707230", - "3400934096754", - "3400934606182", - "3400938118223", - "3400937531191", - "3400934052460", - "3400939320694", - "3400939239811", - "3400939656892", - "3400921928310", - "3400926765750", - "3400949720712", - "3400949719013", - "3400936323155", - "3400939815824", - "3400938139853", - "3400938109757", - "3400941888229", - "3400927824494", - "3400949001323", - "3400949001286", - "3400936674202", - "3400936357624", - "3400936357334", - "3400936357273", - "3400939074276", - "3400930066423", - "3400941859533", - "3400926814700", - "3400937827539", - "3400949498994", - "3400949721832", - "3400938782004", - "3400922037769", - "3400935533890", - "3400935584878", - "3400938784305", - "3400922491752", - "3400927846847", - "3400922034348", - "3400930070567", - "3400936871380", - "3400927932526", - "3400922197487", - "3400949946938", - "3400927931864", - "3400927870910", - "3400934410642", - "3400949940264", - "3400938117912", - "3400938186758", - "3400949001644", - "3400921854466", - "3400927870859", - "3400949768967", - "3400949495863", - "3400938993899", - "3400937828130", - "3400922035987", - "3400934096006", - "3400927817861", - "3400927821592", - "3400926955847", - "3400930060728", - "3400936886810", - "3400927352959", - "3400926954376", - "3400949717750", - "3400949763764", - "3400938117042", - "3400927607554", - "3400934303333", - "3400921995664", - "3400921994025", - "3400938113549", - "3400934475818", - "3400934098017", - "3400927568251", - "3400922197777", - "3400934963261", - "3400922146850", - "3400926817190", - "3400927366338", - "3400927481680", - "3400927418419", - "3400938782172", - "3400938105094", - "3400949758333", - "3400949947010", - "3400938119862", - "3400937827300", - "3400921837896", - "3400921836547", - "3400930060216", - "3400922465678", - "3400926666026", - "3400927801082", - "3400930027301", - "3400935584120", - "3400930060711", - "3400934548420", - "3400921605877", - "3400949009763", - "3400938991420", - "3400922470870", - "3400949026104", - "3400949021949", - "3400939152080", - "3400941906060", - "3400949022199", - "3400939655543", - "3400934417566", - "3400941896842", - "3400938113488", - "3400949341139", - "3400941895203", - "3400937974820", - "3400935973467", - "3400934601910", - "3400921628760", - "3400927932984", - "3400939307459", - "3400921604757", - "3400941904578", - "3400938174113", - "3400936677104", - "3400936673489", - "3400936325746", - "3400936484344", - "3400936668638", - "3400941894190", - "3400936668058", - "3400936290693", - "3400936290174", - "3400939146577", - "3400936803954", - "3400949943517", - "3400939144917", - "3400939079820", - "3400938194043", - "3400935532831", - "3400936388499", - "3400927933585", - "3400934417337", - "3400936674141", - "3400922389585", - "3400927368578", - "3400927800542", - "3400927801143", - "3400921792621", - "3400936356443", - "3400936356214", - "3400936323964", - "3400936323735", - "3400936583559", - "3400921779905", - "3400927932816", - "3400921622218", - "3400921638936", - "3400921605068", - "3400926801113", - "3400934096983", - "3400927931574", - "3400927883927", - "3400921637816", - "3400927930225", - "3400922393148", - "3400921630022", - "3400922392028", - "3400922391137", - "3400921627411" + "3400949001279", + "3400938104905", + "3400938268706", + "3400938278170", + "3400938277920", + "3400938277579", + "3400938277340", + "3400949766208", + "3400949015856", + "3400949015795", + "3400949337118", + "3400949336456", + "3400949001637", + "3400949011834", + "3400938103205", + "3400949011544", + "3400936291706", + "3400936291584", + "3400936290754", + "3400936287792", + "3400936288515", + "3400936287914", + "3400934868139", + "3400941787461", + "3400949337286", + "3400927818172", + "3400927823206", + "3400922392486", + "3400939074566", + "3400936747333", + "3400936884397", + "3400949014217", + "3400938104035", + "3400938144307", + "3400949001064", + "3400949498765", + "3400941889578", + "3400949000500", + "3400949001057", + "3400941788642", + "3400941783388", + "3400949001033", + "3400949001026", + "3400949001019", + "3400938954401", + "3400937530880", + "3400936483163", + "3400936676732", + "3400936674431", + "3400936673779", + "3400936673250", + "3400921838329", + "3400937827997", + "3400938782462", + "3400936747272", + "3400921854817", + "3400938112887", + "3400949946419", + "3400949968879", + "3400949941155", + "3400938106107", + "3400941859014", + "3400927481741", + "3400939307688", + "3400921850215", + "3400949117154", + "3400949117093", + "3400949116263", + "3400941902109", + "3400941889349", + "3400941888168", + "3400936887411", + "3400936887350", + "3400936884168", + "3400936883918", + "3400936883857", + "3400936805446", + "3400936805217", + "3400936676961", + "3400921992304", + "3400949004799", + "3400949003440", + "3400941787522", + "3400941607981", + "3400941784569", + "3400921994773", + "3400949008643", + "3400936804784", + "3400941904868", + "3400938119343", + "3400930003060", + "3400938142693", + "3400938140163", + "3400938112139", + "3400938112078", + "3400938109986", + "3400938109696", + "3400936483514", + "3400941611834", + "3400941610073", + "3400941609244", + "3400941608292", + "3400949008124", + "3400949007523", + "3400938106336", + "3400949015917", + "3400936747562", + "3400935674289", + "3400936281820", + "3400935842374", + "3400935974419", + "3400936284432", + "3400936886759", + "3400949001330", + "3400949000517", + "3400939213156", + "3400939524689", + "3400936883796", + "3400936282940", + "3400939072784", + "3400922389875", + "3400938118452", + "3400936804036", + "3400936483682", + "3400938117103", + "3400949761005", + "3400949946358", + "3400949001354", + "3400949001347", + "3400941781209", + "3400941781087", + "3400941780837", + "3400938117790", + "3400922489223", + "3400927607493", + "3400926663872", + "3400949699858", + "3400949970889", + "3400936288744", + "3400934081217", + "3400934130106", + "3400934052750", + "3400934455186", + "3400934455247", + "3400926957278", + "3400927568022", + "3400927567711", + "3400927567650", + "3400922296005", + "3400934415555", + "3400934415494", + "3400930011638", + "3400921927948", + "3400921928198", + "3400921848083", + "3400921847543", + "3400926955786", + "3400926954086", + "3400935585011", + "3400930060735", + "3400930060704", + "3400922295923", + "3400922296463", + "3400922197609", + "3400921836776", + "3400921611212", + "3400921633672", + "3400933839352", + "3400934869259", + "3400934869020", + "3400933839291", + "3400933839871", + "3400930011515", + "3400930002926", + "3400930011508", + "3400930060193", + "3400926823405", + "3400926823283", + "3400934963841", + "3400934475757", + "3400934868429", + "3400922197548", + "3400922197319", + "3400922403526", + "3400922095431", + "3400930060179", + "3400930060698", + "3400930060247", + "3400921636345", + "3400921634914", + "3400935674401", + "3400936862746", + "3400949762644", + "3400949719181", + "3400926816889", + "3400926815530", + "3400926957568", + "3400927359583", + "3400927359415", + "3400922037820", + "3400922037530", + "3400922037479", + "3400939030272", + "3400933341213", + "3400933341152", + "3400936323094", + "3400935533951", + "3400930066409", + "3400930066386", + "3400930066379", + "3400935585479", + "3400935584939", + "3400935584588", + "3400935533432", + "3400935533203", + "3400935585301", + "3400922037240", + "3400922036069", + "3400922034287", + "3400922033808", + "3400935584298", + "3400934706639", + "3400934081095", + "3400921849844", + "3400921852516", + "3400921852165", + "3400935584700", + "3400935584410", + "3400936282360", + "3400937991339", + "3400930070604", + "3400930070598", + "3400930070574", + "3400934456077", + "3400936871731", + "3400936871670", + "3400936871441", + "3400934410413", + "3400934303562", + "3400934081446", + "3400921928259", + "3400921995954", + "3400921994315", + "3400921998627", + "3400922000459", + "3400921993653", + "3400921992823", + "3400949766376", + "3400936668119", + "3400936282889", + "3400936805965", + "3400922463438", + "3400926955908", + "3400939389653", + "3400938778502", + "3400938776379", + "3400949721542", + "3400949721481", + "3400938991598", + "3400938277869", + "3400927932694", + "3400936288683", + "3400949766666", + "3400949718061", + "3400949755370", + "3400949494514", + "3400949759804", + "3400949495573", + "3400927823374", + "3400949718412", + "3400949718351", + "3400949702091", + "3400949701612", + "3400949494743", + "3400949757152", + "3400949758791", + "3400949758562", + "3400949757381", + "3400949487189", + "3400949970421", + "3400949946297", + "3400949946129", + "3400927931925", + "3400927931116", + "3400927930683", + "3400927871399", + "3400927871221", + "3400927929854", + "3400935881694", + "3400939307749", + "3400939307510", + "3400939255439", + "3400939541778", + "3400939271286", + "3400939322018", + "3400949754021", + "3400938703979", + "3400938703740", + "3400939075228", + "3400939030623", + "3400939073095", + "3400939030562", + "3400938117561", + "3400938117271", + "3400938119633", + "3400938119572", + "3400938112948", + "3400937827188", + "3400938118681", + "3400939321646", + "3400936287105", + "3400936287044", + "3400934549021", + "3400934706868", + "3400934705519", + "3400934097935", + "3400939030333", + "3400934455766", + "3400938991710", + "3400934705229", + "3400941784279", + "3400939072555", + "3400949003099", + "3400941787751", + "3400934130045", + "3400927931284", + "3400927929915", + "3400927366796", + "3400926665654", + "3400927418358", + "3400939239699", + "3400939321936", + "3400937991278", + "3400949719532", + "3400949718870", + "3400930011669", + "3400927607264", + "3400935533371", + "3400949968411", + "3400936284661", + "3400938193503", + "3400938108866", + "3400934606533", + "3400938104844", + "3400922402406", + "3400949753949", + "3400949718122", + "3400949498536", + "3400936287273", + "3400949755202", + "3400949719471", + "3400926953775", + "3400949946068", + "3400949945986", + "3400949946877", + "3400949946709", + "3400949946648", + "3400949941445", + "3400922462776", + "3400922465210", + "3400926668327", + "3400926667894", + "3400926661281", + "3400926663414", + "3400939541198", + "3400939320816", + "3400939320755", + "3400939213385", + "3400939213217", + "3400939213095", + "3400939239989", + "3400949767786", + "3400939030104", + "3400939072326", + "3400938781052", + "3400938780802", + "3400938778380", + "3400938785364", + "3400949939893", + "3400938118513", + "3400938118162", + "3400938194333", + "3400937974998", + "3400938194104", + "3400938193732", + "3400939541549", + "3400938784015", + "3400938186529", + "3400922403816", + "3400926951306", + "3400935514790", + "3400936290525", + "3400936282421", + "3400936668287", + "3400934304163", + "3400927607615", + "3400934415784", + "3400934475696", + "3400922491233", + "3400922490281", + "3400927417818", + "3400939322186", + "3400937530941", + "3400941902048", + "3400949026272", + "3400949014156", + "3400926661632", + "3400922489452", + "3400938407525", + "3400921634792", + "3400941783159", + "3400930060223", + "3400949000838", + "3400930011539", + "3400934097126", + "3400922296173", + "3400949769278", + "3400949769049", + "3400949767908", + "3400949498246", + "3400949498017", + "3400949762415", + "3400949000494", + "3400927357572", + "3400922491691", + "3400922491462", + "3400922490403", + "3400927824326", + "3400927823435", + "3400949721771", + "3400939257679", + "3400939257440", + "3400939541488", + "3400939540948", + "3400939656953", + "3400939656724", + "3400949761463", + "3400938778151", + "3400938776140", + "3400938993660", + "3400935941879", + "3400937828369", + "3400937828598", + "3400927357343", + "3400927352898", + "3400926766870", + "3400926766351", + "3400927607325", + "3400927481512", + "3400939389882", + "3400937828079", + "3400939075167", + "3400939080710", + "3400941785511", + "3400922036298", + "3400927357404", + "3400936482852", + "3400927822544", + "3400926766580", + "3400922033976", + "3400926822163", + "3400926815998", + "3400927818004", + "3400927817922", + "3400927822483", + "3400936482913", + "3400949721023", + "3400949720880", + "3400949720651", + "3400949719242", + "3400949721313", + "3400949718702", + "3400927417986", + "3400938785074", + "3400949761173", + "3400949717811", + "3400949717699", + "3400949765256", + "3400949765027", + "3400927821714", + "3400927821653", + "3400927800481", + "3400927568190", + "3400949718580", + "3400939388472", + "3400939388243", + "3400936323216", + "3400934476068", + "3400938408416", + "3400939816074", + "3400949763535", + "3400949760053", + "3400937827829", + "3400937827768", + "3400938116731", + "3400938116502", + "3400938113600", + "3400927481451", + "3400927481390", + "3400927418129", + "3400927418068", + "3400927417757", + "3400927933295", + "3400927933127", + "3400939815763", + "3400938113259", + "3400938113020", + "3400939079998", + "3400921609370", + "3400938175172", + "3400949339648", + "3400936324336", + "3400921636574", + "3400921628999", + "3400949341948", + "3400941657320", + "3400941896613", + "3400936806627", + "3400921607949", + "3400941895371", + "3400921793390", + "3400927932175", + "3400936290235", + "3400921851335", + "3400936484115", + "3400927930973", + "3400936673830", + "3400921792560", + "3400936281240", + "3400934615528", + "3400936325975", + "3400949766727", + "3400949766437", + "3400936672949", + "3400949011773", + "3400949022311", + "3400949022250", + "3400949341887", + "3400949341719", + "3400949341368", + "3400949341078", + "3400941904400", + "3400949016686", + "3400949016457", + "3400949623228", + "3400941659041", + "3400941658969", + "3400941895661", + "3400941894312", + "3400941893131", + "3400941892998", + "3400941892820", + "3400949338986", + "3400949338818", + "3400941658730", + "3400941657498", + "3400949943456", + "3400949021598", + "3400949010073", + "3400921636284", + "3400949943395", + "3400930000755", + "3400927873690", + "3400949026562", + "3400949026333", + "3400949015047", + "3400949014965", + "3400949014675", + "3400921627701", + "3400921627589", + "3400949011605", + "3400949948079", + "3400921849035", + "3400921609660", + "3400927368110", + "3400934455827", + "3400935583987", + "3400927930805", + "3400939257389", + "3400939307220", + "3400938174342", + "3400921611502", + "3400949948130", + "3400926802462", + "3400936281189", + "3400926814410", + "3400935584069", + "3400936480551", + "3400949948369", + "3400949339709", + "3400939146348", + "3400949022021", + "3400939154503", + "3400939151250", + "3400936324626", + "3400922037301", + "3400921790559", + "3400939655482", + "3400936325227", + "3400927884467", + "3400949014736", + "3400949016518", + "3400949947997", + "3400938105155", + "3400938140224", + "3400922471761", + "3400938111828", + "3400936325807", + "3400936325395", + "3400936325166", + "3400937974769", + "3400938268416", + "3400936480612", + "3400936583610", + "3400936677043", + "3400936674080", + "3400936673540", + "3400938140453", + "3400936484283", + "3400936673199", + "3400936673021", + "3400936672710", + "3400936388611", + "3400936388550", + "3400936480490", + "3400936672659", + "3400936668577", + "3400938174922", + "3400938174052", + "3400936668409", + "3400936668348", + "3400939080888", + "3400939389592", + "3400933839932", + "3400939657325", + "3400939151779", + "3400939657264", + "3400939657035", + "3400939154732", + "3400939154442", + "3400939151601", + "3400936356153", + "3400939151489", + "3400939321707", + "3400939145228", + "3400939307398", + "3400938119282", + "3400938119114", + "3400939146287", + "3400939080130", + "3400938119053", + "3400939154213", + "3400939154091", + "3400934095924", + "3400935533661", + "3400935533142", + "3400934303913", + "3400935533081", + "3400936280939", + "3400936290983", + "3400936290006", + "3400936034372", + "3400927933417", + "3400922390826", + "3400921623796", + "3400921793222", + "3400921901405", + "3400936323674", + "3400921640137", + "3400921639995", + "3400921638707", + "3400921633443", + "3400921633382", + "3400921604818", + "3400926802004", + "3400926800802", + "3400926813529", + "3400926813178", + "3400921626698", + "3400921626469", + "3400926664992", + "3400926662813", + "3400927931406", + "3400927884238", + "3400927884009", + "3400921848953", + "3400921780093", + "3400922471532", + "3400921638646", + "3400921637694", + "3400949948420", + "3400927930515", + "3400927930393", + "3400927871689", + "3400921851274", + "3400921779493", + "3400921779325", + "3400921623567", + "3400921611380", + "3400921608199", + "3400936583498", + "3400936672888", + "3400922296234", + "3400927800832", + "3400921607888", + "3400921622447", + "3400921606010", + "3400922392196", + "3400922393438", + "3400922393087", + "3400921632552", + "3400922000510", + "3400922000398", + "3400921906837", + "3400936324565", + "3400921632323", + "3400921630251", + "3400921629941", + "3400927932236", + "3400927920639", + "3400930002896", + "3400930000724", + "3400930003039", + "3400922470702", + "3400930015384", + "3400934475528", + "3400935533722", + "3400938268584", + "3400938103953", + "3400936673311", + "3400941610424", + "3400941788413", + "3400949117963", + "3400936483743", + "3400936673601", + "3400938785654", + "3400921928020", + "3400922036359", + "3400936291645", + "3400949004621", + "3400935514561", + "3400930027288", + "3400949946587", + "3400949116324", + "3400941888458", + "3400936887299", + "3400936805385", + "3400949337347", + "3400941785689", + "3400938105445", + "3400936805736", + "3400941609824", + "3400921623338", + "3400949336517", + "3400939524450", + "3400936290815", + "3400936287853", + "3400938108576", + "3400922033747", + "3400949763993", + "3400949721191", + "3400939255729", + "3400939239521", + "3400936290464", + "3400926951535", + "3400949700202", + "3400949755141", + "3400938673081", + "3400939072845", + "3400949007691", + "3400936804845", + "3400938954340", + "3400936862685", + "3400936281530", + "3400936286214", + "3400941906299", + "3400949118793", + "3400926822453", + "3400936674370", + "3400937991568", + "3400934707230", + "3400934096754", + "3400934606182", + "3400938118223", + "3400937531191", + "3400934052460", + "3400939320694", + "3400939239811", + "3400939656892", + "3400921928310", + "3400926765750", + "3400949720712", + "3400949719013", + "3400936323155", + "3400939815824", + "3400938139853", + "3400938109757", + "3400941888229", + "3400927824494", + "3400949001323", + "3400949001286", + "3400936674202", + "3400936357624", + "3400936357334", + "3400936357273", + "3400939074276", + "3400930066423", + "3400941859533", + "3400926814700", + "3400937827539", + "3400949498994", + "3400949721832", + "3400938782004", + "3400922037769", + "3400935533890", + "3400935584878", + "3400938784305", + "3400922491752", + "3400927846847", + "3400922034348", + "3400930070567", + "3400936871380", + "3400927932526", + "3400922197487", + "3400949946938", + "3400927931864", + "3400927870910", + "3400934410642", + "3400949940264", + "3400938117912", + "3400938186758", + "3400949001644", + "3400921854466", + "3400927870859", + "3400949768967", + "3400949495863", + "3400938993899", + "3400937828130", + "3400922035987", + "3400934096006", + "3400927817861", + "3400927821592", + "3400926955847", + "3400930060728", + "3400936886810", + "3400927352959", + "3400926954376", + "3400949717750", + "3400949763764", + "3400938117042", + "3400927607554", + "3400934303333", + "3400921995664", + "3400921994025", + "3400938113549", + "3400934475818", + "3400934098017", + "3400927568251", + "3400922197777", + "3400934963261", + "3400922146850", + "3400926817190", + "3400927366338", + "3400927481680", + "3400927418419", + "3400938782172", + "3400938105094", + "3400949758333", + "3400949947010", + "3400938119862", + "3400937827300", + "3400921837896", + "3400921836547", + "3400930060216", + "3400922465678", + "3400926666026", + "3400927801082", + "3400930027301", + "3400935584120", + "3400930060711", + "3400934548420", + "3400921605877", + "3400949009763", + "3400938991420", + "3400922470870", + "3400949026104", + "3400949021949", + "3400939152080", + "3400941906060", + "3400949022199", + "3400939655543", + "3400934417566", + "3400941896842", + "3400938113488", + "3400949341139", + "3400941895203", + "3400937974820", + "3400935973467", + "3400934601910", + "3400921628760", + "3400927932984", + "3400939307459", + "3400921604757", + "3400941904578", + "3400938174113", + "3400936677104", + "3400936673489", + "3400936325746", + "3400936484344", + "3400936668638", + "3400941894190", + "3400936668058", + "3400936290693", + "3400936290174", + "3400939146577", + "3400936803954", + "3400949943517", + "3400939144917", + "3400939079820", + "3400938194043", + "3400935532831", + "3400936388499", + "3400927933585", + "3400934417337", + "3400936674141", + "3400922389585", + "3400927368578", + "3400927800542", + "3400927801143", + "3400921792621", + "3400936356443", + "3400936356214", + "3400936323964", + "3400936323735", + "3400936583559", + "3400921779905", + "3400927932816", + "3400921622218", + "3400921638936", + "3400921605068", + "3400926801113", + "3400934096983", + "3400927931574", + "3400927883927", + "3400921637816", + "3400927930225", + "3400922393148", + "3400921630022", + "3400922392028", + "3400922391137", + "3400921627411" ) - val ipp = new PharmacologicalClassConfig( name = "IPP", ATCCodes = List("A02BC*") ) override val pharmacologicalClasses: List[PharmacologicalClassConfig] = List(ipp) + } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadBasicExtractor.scala new file mode 100644 index 00000000..a7db967e --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadBasicExtractor.scala @@ -0,0 +1,15 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.had + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col +import fr.polytechnique.cmap.cnam.etl.events.AnyEvent +import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +trait HadBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with HadRowExtractor { + override def getInput(sources: Sources): DataFrame = sources.had.get.estimateStayStartTime + .select(neededColumns.map(col): _*) + +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadExtractor.scala deleted file mode 100644 index cd25b8f2..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadExtractor.scala +++ /dev/null @@ -1,47 +0,0 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.had - -import java.sql.Timestamp - -import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} -import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} - -trait HadExtractor[EventType <: AnyEvent] extends Extractor[EventType] with HadSource with EventRowExtractor { - - val columnName: String - - val eventBuilder: EventBuilder - - def getInput(sources: Sources): DataFrame = sources.had.get.select(ColNames.all.map(col): _*).estimateStayStartTime - - def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(code(row).startsWith(_)) - - def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(columnName)) - - def builder(row: Row): Seq[Event[EventType]] = { - lazy val patientId = extractPatientId(row) - lazy val groupId = extractGroupId(row) - lazy val eventDate = extractStart(row) - lazy val endDate = extractEnd(row) - lazy val weight = extractWeight(row) - - Seq(eventBuilder[EventType](patientId, groupId, code(row), weight, eventDate, endDate)) - } - - def code: Row => String = (row: Row) => row.getAs[Int](columnName).toString - - def extractPatientId(r: Row): String = { - r.getAs[String](ColNames.PatientID) - } - - override def extractGroupId(r: Row): String = { - r.getAs[String](ColNames.EtaNumEpmsi) + "_" + - r.getAs[String](ColNames.RhadNum) + "_" + - r.getAs[Int](NewColumns.Year).toString - } - - def extractStart(r: Row): Timestamp = r.getAs[Timestamp](NewColumns.EstimatedStayStart) -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadRowExtractor.scala new file mode 100644 index 00000000..0238c691 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadRowExtractor.scala @@ -0,0 +1,25 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.had + +import java.sql.Timestamp +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor + +trait HadRowExtractor extends HadSource with EventRowExtractor { + + override def usedColumns: List[String] = List( + ColNames.PatientID, ColNames.EtaNumEpmsi, ColNames.RhadNum, + NewColumns.Year, NewColumns.EstimatedStayStart, ColNames.StayStartDate + ) ++ super.usedColumns + + def extractPatientId(r: Row): String = { + r.getAs[String](ColNames.PatientID) + } + + override def extractGroupId(r: Row): String = { + r.getAs[String](ColNames.EtaNumEpmsi) + "_" + + r.getAs[String](ColNames.RhadNum) + "_" + + r.getAs[Int](NewColumns.Year).toString + } + + def extractStart(r: Row): Timestamp = r.getAs[Timestamp](NewColumns.EstimatedStayStart) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSource.scala index f0d98608..ab4163a9 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSource.scala @@ -1,10 +1,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors.had -import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames -import fr.polytechnique.cmap.cnam.util.ColumnUtilities.parseTimestamp +import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.TimestampType -import org.apache.spark.sql.{Column, DataFrame} +import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames trait HadSource extends ColumnNames { @@ -24,12 +23,8 @@ trait HadSource extends ColumnNames { val StayEndDate: ColName = "SOR_DAT" val StartDate: ColName = "EXE_SOI_DTD" val EndDate: ColName = "EXE_SOI_DTF" - val all = List( - PatientID, DP, DA, CCAM, PEC_PAL, PEC_ASS, EtaNumEpmsi, RhadNum, - StayStartDate, StayEndDate, StartDate, EndDate - ) - val hospitalStayPart = List( - PatientID, EtaNumEpmsi, RhadNum, StartDate, StayStartDate, StayEndDate, EndDate + val core: List[ColName] = List( + PatientID, EtaNumEpmsi, RhadNum, StayStartDate, StayEndDate, StartDate, EndDate ) } @@ -53,9 +48,11 @@ trait HadSource extends ColumnNames { val givenYear: Column = year(givenDate) df.withColumn( - NewColumns.EstimatedStayStart, givenDate) + NewColumns.EstimatedStayStart, givenDate + ) .withColumn( - NewColumns.Year, givenYear) + NewColumns.Year, givenYear + ) } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStaysExtractor.scala index c57f2d85..46eb0805 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStaysExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStaysExtractor.scala @@ -1,30 +1,19 @@ package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays import java.sql.{Date, Timestamp} +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HadHospitalStay, HospitalStay} +import fr.polytechnique.cmap.cnam.etl.extractors.{AlwaysTrueStrategy, BaseExtractorCodes} +import fr.polytechnique.cmap.cnam.etl.extractors.had.HadBasicExtractor -import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HospitalStay, HadHospitalStay} -import fr.polytechnique.cmap.cnam.etl.extractors.had.HadExtractor -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} - -object HadHospitalStaysExtractor extends HadExtractor[HospitalStay] { +object HadHospitalStaysExtractor extends HadBasicExtractor[HospitalStay] + with AlwaysTrueStrategy[HospitalStay] { override val columnName: String = ColNames.EndDate override val eventBuilder: EventBuilder = HadHospitalStay - override def extractEnd(r: Row): Option[Timestamp] = Some(new Timestamp(r.getAs[Date](ColNames.EndDate).getTime)) - - override def extractStart(r: Row): Timestamp = new Timestamp(r.getAs[Date](ColNames.StartDate).getTime) + override def extractValue(row: Row): String = extractGroupId(row) - override def isInStudy(codes: Set[String])(row: Row): Boolean = true - - override def code: Row => String = extractGroupId - - override def getInput(sources: Sources): DataFrame = sources.had.get.select(ColNames.hospitalStayPart.map(col): _*).estimateStayStartTime + override def extractEnd(r: Row): Option[Timestamp] = Some(new Timestamp(r.getAs[Date](ColNames.EndDate).getTime)) - override def extractGroupId(r: Row): String = { - r.getAs[String](ColNames.EtaNumEpmsi) + "_" + - r.getAs[String](ColNames.RhadNum) + "_" + - r.getAs[Int](NewColumns.Year).toString - } + override def getCodes: BaseExtractorCodes = BaseExtractorCodes.empty } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala index 776c88f2..c63058ba 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala @@ -2,27 +2,30 @@ package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays import java.sql.{Date, Timestamp} import scala.util.Try -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HospitalStay, McoHospitalStay} -import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoExtractor -import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.etl.extractors.{AlwaysTrueStrategy, BaseExtractorCodes} +import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoBasicExtractor -object McoHospitalStaysExtractor extends McoExtractor[HospitalStay] { - override val columnName: String = ColNames.EndDate - override val eventBuilder: EventBuilder = McoHospitalStay +object McoHospitalStaysExtractor extends McoBasicExtractor[HospitalStay] with AlwaysTrueStrategy[HospitalStay] { + + override def getCodes: BaseExtractorCodes = BaseExtractorCodes.empty + + override def columnName: String = ColNames.EndDate + + override def eventBuilder: EventBuilder = McoHospitalStay + + override def neededColumns: List[String] = List(ColNames.StayFrom, ColNames.StayFromType) ++ super.usedColumns override def extractEnd(r: Row): Option[Timestamp] = Some(new Timestamp(r.getAs[Date](ColNames.EndDate).getTime)) override def extractStart(r: Row): Timestamp = new Timestamp(r.getAs[Date](ColNames.StartDate).getTime) - override def isInStudy(codes: Set[String])(row: Row): Boolean = true - - override def code: Row => String = extractGroupId + override def extractValue(row: Row): String = extractGroupId(row) override def extractWeight(r: Row): Double = { - getFromValue(r) flatMap (from => getFromType(r) map (fromType => from + fromType * 0.1)) recover { case _ => -1D } get - } + getFromValue(r).flatMap(from => getFromType(r).map(fromType => from + fromType * 0.1)) recover { case _ => -1D } + }.get private def getFromValue(r: Row): Try[Double] = { Try { @@ -42,6 +45,4 @@ object McoHospitalStaysExtractor extends McoExtractor[HospitalStay] { } } } - - override def getInput(sources: Sources): DataFrame = sources.mco.get.select(ColNames.hospitalStayPart.map(col): _*) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala index 4a1164f4..5ded697b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala @@ -1,74 +1,30 @@ package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays import java.sql.{Date, Timestamp} -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} -import fr.polytechnique.cmap.cnam.etl.events.{Event, HospitalStay, McoceEmergency} -import fr.polytechnique.cmap.cnam.etl.extractors.Extractor -import fr.polytechnique.cmap.cnam.etl.sources.Sources +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HospitalStay, McoceEmergency} +import fr.polytechnique.cmap.cnam.etl.extractors.{AlwaysTrueStrategy, BaseExtractorCodes} +import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeBasicExtractor -object McoceEmergenciesExtractor extends Extractor[HospitalStay] with McoceEmergenciesExtractor { - /** Allows to check if the Row from the Source is considered in the current Study. - * - * @param codes A set of codes being considered in the Study. - * @param row The row itself. - * @return A boolean value. - */ - override def isInStudy(codes: Set[String])(row: Row): Boolean = true +object McoceEmergenciesExtractor extends McoCeBasicExtractor[HospitalStay] with AlwaysTrueStrategy[HospitalStay] { /** Checks if the passed Row has the information needed to build the Event. - * - * @param row The row itself. - * @return A boolean value. - */ - override def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(ColNames.ActCode)) && row.getAs[String](ColNames.ActCode).startsWith("ATU") + * + * @param row The row itself. + * @return A boolean value. + */ + override def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(ColNames.ActCode)) && row + .getAs[String](ColNames.ActCode).startsWith("ATU") - /** Builds the Event. - * - * @param row The row itself. - * @return An event object. - */ - override def builder(row: Row): Seq[Event[HospitalStay]] = { - val patientID = extractPatientId(row) - val groupId = extractGroupId(row) - val start = extractStart(row) - val end = extractEnd(row) - Seq(McoceEmergency(patientID, groupId, start, end)) - } + override def extractEnd(r: Row): Option[Timestamp] = Some(new Timestamp(r.getAs[Date](ColNames.EndDate).getTime)) - /** Gets and prepares all the needed columns from the Source. - * - * @param sources Source object [[Sources]] that contains all sources. - * @return A dataframe with mco columns. - */ - override def getInput(sources: Sources): DataFrame = sources.mcoCe.get.select(ColNames.all.map(col): _*) -} - -trait McoceEmergenciesExtractor { - - final object ColNames extends Serializable { - final val PatientID: String = "NUM_ENQ" - final val EtaNum: String = "ETA_NUM" - final val SeqNum: String = "SEQ_NUM" - final val StartDate: String = "EXE_SOI_DTD" - final val EndDate: String = "EXE_SOI_DTF" - final val Year: String = "MCO_FBSTC__SOR_ANN" - final val ActCode: String = "MCO_FBSTC__ACT_COD" - final val all: List[String] = List(PatientID, EtaNum, SeqNum, Year, StartDate, EndDate, ActCode) - } - - def extractPatientId(r: Row): String = { - r.getAs[String](ColNames.PatientID) - } + override def extractValue(row: Row): String = extractGroupId(row) - def extractGroupId(r: Row): String = { - r.getAs[String](ColNames.EtaNum) + "_" + - r.getAs[String](ColNames.SeqNum) + "_" + - r.getAs[Int](ColNames.Year).toString - } + override def columnName: String = ColNames.ActCode - def extractEnd(r: Row): Timestamp = new Timestamp(r.getAs[Date](ColNames.EndDate).getTime) + override def eventBuilder: EventBuilder = McoceEmergency - def extractStart(r: Row): Timestamp = new Timestamp(r.getAs[Date](ColNames.StartDate).getTime) + override def usedColumns: List[String] = List(ColNames.EndDate) ++ super.usedColumns + override def getCodes: BaseExtractorCodes = BaseExtractorCodes.empty } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SsrHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SsrHospitalStaysExtractor.scala index e8b383f0..42a67fe8 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SsrHospitalStaysExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SsrHospitalStaysExtractor.scala @@ -1,14 +1,12 @@ package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays import java.sql.{Date, Timestamp} - +import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HospitalStay, SsrHospitalStay} -import fr.polytechnique.cmap.cnam.etl.extractors.ssr.SsrExtractor -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} +import fr.polytechnique.cmap.cnam.etl.extractors.{AlwaysTrueStrategy, BaseExtractorCodes} +import fr.polytechnique.cmap.cnam.etl.extractors.ssr.SsrBasicExtractor -object SsrHospitalStaysExtractor extends SsrExtractor[HospitalStay] { +object SsrHospitalStaysExtractor extends SsrBasicExtractor[HospitalStay] with AlwaysTrueStrategy[HospitalStay] { override val columnName: String = ColNames.EndDate override val eventBuilder: EventBuilder = SsrHospitalStay @@ -16,15 +14,13 @@ object SsrHospitalStaysExtractor extends SsrExtractor[HospitalStay] { override def extractStart(r: Row): Timestamp = new Timestamp(r.getAs[Date](ColNames.StartDate).getTime) - override def isInStudy(codes: Set[String])(row: Row): Boolean = true - - override def code: Row => String = extractGroupId - - override def getInput(sources: Sources): DataFrame = sources.ssr.get.select(ColNames.hospitalStayPart.map(col): _*) + override def extractValue(row: Row): String = extractGroupId(row) override def extractGroupId(r: Row): String = { r.getAs[String](ColNames.EtaNum) + "_" + r.getAs[String](ColNames.RhaNum) + "_" + r.getAs[Int](ColNames.Year).toString } + + override def getCodes: BaseExtractorCodes = BaseExtractorCodes.empty } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbBasicExtractor.scala new file mode 100644 index 00000000..d068cad5 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbBasicExtractor.scala @@ -0,0 +1,15 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.imb + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col +import fr.polytechnique.cmap.cnam.etl.events.AnyEvent +import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +trait ImbBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with ImbRowExtractor { + def getInput(sources: Sources): DataFrame = sources.irImb.get.select(neededColumns.map(col): _*) + + override def usedColumns: List[String] = super.usedColumns +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbRowExtractor.scala new file mode 100644 index 00000000..547bf771 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbRowExtractor.scala @@ -0,0 +1,60 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.imb + +import java.sql.{Date, Timestamp} +import scala.util.Try +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor +import fr.polytechnique.cmap.cnam.util.datetime +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +/** IR_IMB_R contains the Chronic Diseases diagnoses (ALD = Affection Longue Duree) for patients once + * they have been exonerated for all cares related to this Chronic Disease. + * It is the medical service of the health insurance that grants this ALD on the proposal of the + * patient's main physician (Medecin Traitant). + * See the [online snds documentation for further details](https://documentation-snds.health-data-hub.fr/fiches/beneficiaires_ald.html#le-dispositif-des-ald) + * + */ +trait ImbRowExtractor extends ImbSource with EventRowExtractor { + + def extractCode(row: Row): String = row.getAs[String](ColNames.Code) + + def extractEncoding(row: Row): String = row.getAs[String](ColNames.Encoding) + + override def extractPatientId(row: Row): String = row.getAs[String](ColNames.PatientID) + + override def extractStart(row: Row): Timestamp = { + import datetime.implicits._ + + row.getAs[Date](ColNames.Date).toTimestamp + } + + /** + * The End date of the ALD is not always written. It can takes the value 1600-01-01 which + * corresponds to a None value (not set) that we convert to None. + * See the CNAM documentation [available here](https://documentation-snds.health-data-hub.fr/fiches/beneficiaires_ald.html#annexe) + * + * @param r + * @return + */ + override def extractEnd(r: Row): Option[Timestamp] = { + import datetime.implicits._ + Try( + { + val rawEndDate = r.getAs[java.util.Date](ColNames.EndDate).toTimestamp + + if (makeTS(1700, 1, 1).after(rawEndDate)) { + None + } + else { + Some(rawEndDate) + } + } + ) recover { + case _: NullPointerException => None + } + }.get +} + + diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbSource.scala new file mode 100644 index 00000000..0939dcc4 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbSource.scala @@ -0,0 +1,15 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.imb + +import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames + +trait ImbSource extends ColumnNames{ + final object ColNames extends Serializable { + final lazy val PatientID = "NUM_ENQ" + final lazy val Encoding = "MED_NCL_IDT" + final lazy val Code = "MED_MTF_COD" + final lazy val Date = "IMB_ALD_DTD" + final lazy val EndDate = "IMB_ALD_DTF" + } +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoBasicExtractor.scala new file mode 100644 index 00000000..e11dbac3 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoBasicExtractor.scala @@ -0,0 +1,14 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.mco + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col +import fr.polytechnique.cmap.cnam.etl.events.AnyEvent +import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +trait McoBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with McoRowExtractor { + def getInput(sources: Sources): DataFrame = + sources.mco.get.select(neededColumns.map(col): _*).estimateStayStartTime +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala deleted file mode 100644 index 8c6f7ca0..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoExtractor.scala +++ /dev/null @@ -1,77 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors.mco - -import java.sql.Timestamp -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} -import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} -import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} -import fr.polytechnique.cmap.cnam.etl.sources.Sources - -trait McoExtractor[EventType <: AnyEvent] extends Extractor[EventType] with McoSource with EventRowExtractor { - - val columnName: String - - val eventBuilder: EventBuilder - - def getInput(sources: Sources): DataFrame = sources.mco.get.select(ColNames.all.map(col): _*).estimateStayStartTime - - def isInStudy(codes: Set[String])(row: Row): Boolean = { - codes.exists(code(row).startsWith(_)) - } - - def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(columnName)) - - def builder(row: Row): Seq[Event[EventType]] = { - lazy val patientId = extractPatientId(row) - lazy val groupId = extractGroupId(row) - lazy val eventDate = extractStart(row) - lazy val endDate = extractEnd(row) - lazy val weight = extractWeight(row) - - Seq(eventBuilder[EventType](patientId, groupId, code(row), weight, eventDate, endDate)) - } - - /** Extracts the tracked value. - * - * @return A string value. - */ - def code = (row: Row) => row.getAs[String](columnName) - - /** It gets PatientID value from row. - * - * @param r The row itself. - * @return The value of PatientID. - */ - def extractPatientId(r: Row): String = { - r.getAs[String](ColNames.PatientID) - } - - /** Creates an ID that group Events of different categories - * by concatinating ETA_NUM, RSA_NUM and the YEAR. - * - * @param r The row itself. - * @return The value of groupId. - */ - override def extractGroupId(r: Row): String = { - r.getAs[String](ColNames.EtaNum) + "_" + - r.getAs[String](ColNames.RsaNum) + "_" + - r.getAs[Int](ColNames.Year).toString - } - - /** Extracts the EstimatedStayStart as the start. - * It comes from the method [[McoDataFrame.estimateStayStartTime]]. - * - * @param r The row itself. - * @return The value of EstimatedStayStart. - */ - def extractStart(r: Row): Timestamp = r.getAs[Timestamp](NewColumns.EstimatedStayStart) - - /** It gets ExitMode from row. - * - * @param r The row itself. - * @return The value of ExitMode. - */ - def getExit(r: Row): String = r.getAs[String](ColNames.ExitMode) -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoRowExtractor.scala new file mode 100644 index 00000000..6d40845c --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoRowExtractor.scala @@ -0,0 +1,42 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.mco + +import java.sql.Timestamp +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor + +trait McoRowExtractor extends McoSource with EventRowExtractor { + + override def usedColumns: List[String] = ColNames.core ++ super.usedColumns + + /** It gets PatientID value from MCO source. + * + * @param r The row itself. + * @return The value of PatientID. + */ + def extractPatientId(r: Row): String = { + r.getAs[String](ColNames.PatientID) + } + + /** Creates an ID that group Events of different categories + * by concatenating ETA_NUM, RSA_NUM and the YEAR. + * + * @param r The row itself. + * @return The value of groupId. + */ + override def extractGroupId(r: Row): String = { + r.getAs[String](ColNames.EtaNum) + "_" + + r.getAs[String](ColNames.RsaNum) + "_" + + r.getAs[Int](ColNames.Year).toString + } + + /** Extracts the EstimatedStayStart as the start. + * It comes from the method [[McoDataFrame.estimateStayStartTime]]. + * + * @param r The row itself. + * @return The value of EstimatedStayStart. + */ + def extractStart(r: Row): Timestamp = r.getAs[Timestamp](NewColumns.EstimatedStayStart) +} + diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala index 8cd103d0..ed3ffa66 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala @@ -2,9 +2,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors.mco +import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{LongType, TimestampType} -import org.apache.spark.sql.{Column, DataFrame} import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames import fr.polytechnique.cmap.cnam.util.ColumnUtilities.parseTimestamp @@ -31,6 +31,11 @@ trait McoSource extends ColumnNames { val CCAMDelayDate: ColName = "MCO_A__ENT_DAT_DEL" val StayFrom: ColName = "MCO_B__ENT_MOD" val StayFromType: ColName = "MCO_B__ENT_PRV" + + val core = List( + PatientID, EtaNum, RsaNum, Year, StayEndMonth, StayEndYear, StayLength, + StayStartDate, StayEndDate, StartDate, EndDate + ) val all = List( PatientID, DP, DR, DA, CCAM, GHM, EtaNum, RsaNum, Year, ExitMode, StayEndMonth, StayEndYear, StayLength, StayStartDate, StayEndDate, StartDate, EndDate, CCAMDelayDate diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeBasicExtractor.scala new file mode 100644 index 00000000..8fd4da6d --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeBasicExtractor.scala @@ -0,0 +1,11 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.mcoCe + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col +import fr.polytechnique.cmap.cnam.etl.events.AnyEvent +import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +trait McoCeBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with McoCeRowExtractor { + def getInput(sources: Sources): DataFrame = sources.mcoCe.get.select(neededColumns.map(col): _*) +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala deleted file mode 100644 index 03e53530..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeExtractor.scala +++ /dev/null @@ -1,51 +0,0 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.mcoCe - -import java.sql.Timestamp -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} -import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} -import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.datetime.implicits._ - - -trait McoCeExtractor[EventType <: AnyEvent] extends Extractor[EventType] with McoCeSource with EventRowExtractor { - - val columnName: String - - val eventBuilder: EventBuilder - - def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(code(row).startsWith(_)) - - def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(columnName)) - - def builder(row: Row): Seq[Event[EventType]] = { - lazy val patientId = extractPatientId(row) - lazy val groupId = extractGroupId(row) - lazy val eventDate = extractStart(row) - lazy val endDate = extractEnd(row) - lazy val weight = extractWeight(row) - - Seq(eventBuilder[EventType](patientId, groupId, code(row), weight, eventDate, endDate)) - } - - def code = (row: Row) => row.getAs[String](columnName) - - def extractPatientId(r: Row): String = { - r.getAs[String](ColNames.PatientID) - } - - /** Return groupID as hospital stay ID - * - * @param r - * @return groupId which is the unique ID of the hospital stay - */ - override def extractGroupId(r: Row): String = { - r.getAs[String](ColNames.EtaNum) + "_" + - r.getAs[String](ColNames.SeqNum) + "_" + - r.getAs[Int](ColNames.Year).toString - } - - def extractStart(r: Row): Timestamp = r.getAs[Timestamp](ColNames.Date).toTimestamp -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeRowExtractor.scala new file mode 100644 index 00000000..489c2bb6 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeRowExtractor.scala @@ -0,0 +1,34 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.mcoCe + +import java.sql.Timestamp +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor +import fr.polytechnique.cmap.cnam.util.datetime.implicits._ + +trait McoCeRowExtractor extends McoCeSource with EventRowExtractor { + override def usedColumns: List[String] = super.usedColumns ++ List( + ColNames.PatientID, ColNames.EtaNum, + ColNames.SeqNum, ColNames.Year, + ColNames.StartDate + ) + + def extractPatientId(r: Row): String = { + r.getAs[String](ColNames.PatientID) + } + + /** Return groupID as hospital stay ID + * + * @param r + * @return groupId which is the unique ID of the hospital stay + */ + override def extractGroupId(r: Row): String = { + r.getAs[String](ColNames.EtaNum) + "_" + + r.getAs[String](ColNames.SeqNum) + "_" + + r.getAs[Int](ColNames.Year).toString + } + + def extractStart(r: Row): Timestamp = r.getAs[Timestamp](ColNames.StartDate).toTimestamp + +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala index 4bb1e0b3..6c53b728 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala @@ -1,10 +1,6 @@ package fr.polytechnique.cmap.cnam.etl.extractors.mcoCe -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types.{LongType, TimestampType} -import org.apache.spark.sql.{Column, DataFrame} import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames -import fr.polytechnique.cmap.cnam.util.ColumnUtilities.parseTimestamp trait McoCeSource extends ColumnNames { @@ -12,11 +8,7 @@ trait McoCeSource extends ColumnNames { // Essential for all the Extractors val PatientID: ColName = "NUM_ENQ" val EtaNum: ColName = "ETA_NUM" - val SeqNum : ColName = "SEQ_NUM" - val Date = "EXE_SOI_DTD" - val Year = "year" - - // For the Act extractor + val SeqNum: ColName = "SEQ_NUM" val CamCode = "MCO_FMSTC__CCAM_COD" // NGAP from FBSTC @@ -33,7 +25,20 @@ trait McoCeSource extends ColumnNames { // Practionner from FCSTC val PractitionnerSpecialtyFcstc = "MCO_FCSTC__EXE_SPE" - val core = List(PatientID, EtaNum, SeqNum, Date, Year) + val StartDate: String = "EXE_SOI_DTD" + val EndDate: String = "EXE_SOI_DTF" + val ActCode: String = "MCO_FBSTC__ACT_COD" + + val core = List( + PatientID, EtaNum, SeqNum, Year, StartDate + ) + + val all = List( + PatientID, EtaNum, SeqNum, Year, CamCode, StartDate, + NgapKeyLetterFbstc, NgapCoefficientFbstc, PractitionnerSpecialtyFbstc, + NgapKeyLetterFcstc, NgapCoefficientFcstc, PractitionnerSpecialtyFcstc + ) + } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchases.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchases.scala index 35a3e332..a91f3e16 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchases.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchases.scala @@ -3,24 +3,22 @@ package fr.polytechnique.cmap.cnam.etl.extractors.molecules import java.sql.Timestamp +import org.apache.spark.sql.{Column, DataFrame, Row} import org.apache.spark.sql.expressions.Window -import org.apache.spark.sql.functions._ +import org.apache.spark.sql.functions.{col, sum, udf, when} import org.apache.spark.sql.types.{DoubleType, StringType, TimestampType} -import org.apache.spark.sql.{Column, DataFrame, Row} import fr.polytechnique.cmap.cnam.etl.events.{Event, Molecule} import fr.polytechnique.cmap.cnam.etl.extractors.Extractor import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.DrugEventsTransformerHelper -class DcirMoleculePurchases(config: MoleculePurchasesConfig) extends Extractor[Molecule] { +class DcirMoleculePurchases(config: MoleculePurchasesConfig) extends Extractor[Molecule, MoleculePurchasesConfig] { - override def isInStudy(codes: Set[String])(row: Row): Boolean = - codes.contains(row.getAs[String](Columns.Category)) + override def isInStudy(row: Row): Boolean = config.drugClasses.contains(row.getAs[String](Columns.Category)) override def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(Columns.EventDate)) && row.getAs[Int](Columns.NBoxes) > 0 - override def builder(row: Row): Seq[Event[Molecule]] = Seq( Molecule( getPatientID(row), @@ -30,6 +28,14 @@ class DcirMoleculePurchases(config: MoleculePurchasesConfig) extends Extractor[M ) ) + def getPatientID(row: Row): String = row.getAs[String](Columns.PatientID) + + def getValue(row: Row): String = row.getAs[String](Columns.MoleculeName) + + def getWeight(row: Row): Double = row.getAs[Double](Columns.TotalDose) + + def getEventDate(row: Row): Timestamp = row.getAs[Timestamp](Columns.EventDate) + override def getInput(sources: Sources): DataFrame = { val dcirInputColumns: List[Column] = List( col("NUM_ENQ").cast(StringType).as("patientID"), @@ -52,7 +58,8 @@ class DcirMoleculePurchases(config: MoleculePurchasesConfig) extends Extractor[M col("TOTAL_MG_PER_UNIT").cast(DoubleType).as("dosage") ) - val groupCols: List[Column] = List(col("patientID"), + val groupCols: List[Column] = List( + col("patientID"), col("moleculeName"), col("eventDate") ) @@ -66,9 +73,10 @@ class DcirMoleculePurchases(config: MoleculePurchasesConfig) extends Extractor[M val df = sources.dcir.get .select(dcirInputColumns: _*) - .withColumn(Columns.NBoxes, when(col(Columns.NBoxes) < 0, 0) - .when(col(Columns.NBoxes) > config.maxBoxQuantity, 0) - .otherwise(col(Columns.NBoxes)) + .withColumn( + Columns.NBoxes, when(col(Columns.NBoxes) < 0, 0) + .when(col(Columns.NBoxes) > config.maxBoxQuantity, 0) + .otherwise(col(Columns.NBoxes)) ) // get CIP07 drug val joinedByCIP07 = df @@ -88,14 +96,6 @@ class DcirMoleculePurchases(config: MoleculePurchasesConfig) extends Extractor[M .withColumn(Columns.TotalDose, sum(col(Columns.Dosage) * col(Columns.NBoxes)) over win) // Compute total dose } - def getPatientID(row: Row): String = row.getAs[String](Columns.PatientID) - - def getValue(row: Row): String = row.getAs[String](Columns.MoleculeName) - - def getWeight(row: Row): Double = row.getAs[Double](Columns.TotalDose) - - def getEventDate(row: Row): Timestamp = row.getAs[Timestamp](Columns.EventDate) - final object Columns extends Serializable { val PatientID = "patientID" val CIP07 = "CIP07" @@ -108,5 +108,6 @@ class DcirMoleculePurchases(config: MoleculePurchasesConfig) extends Extractor[M val TotalDose = "totalDose" } + override def getCodes: MoleculePurchasesConfig = config } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchases.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchases.scala index eb2ca0fe..fa6284c9 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchases.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchases.scala @@ -9,6 +9,6 @@ import fr.polytechnique.cmap.cnam.etl.sources.Sources class MoleculePurchases(config: MoleculePurchasesConfig) { def extract(sources: Sources): Dataset[Event[Molecule]] = { - new DcirMoleculePurchases(config).extract(sources, config.drugClasses.toSet) + new DcirMoleculePurchases(config).extract(sources) } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesConfig.scala index 3a065cef..aac8a322 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesConfig.scala @@ -2,7 +2,7 @@ package fr.polytechnique.cmap.cnam.etl.extractors.molecules -import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig +import fr.polytechnique.cmap.cnam.etl.extractors.{ExtractorCodes, ExtractorConfig} /** * Base definition of the config needed by the MoleculePurchases extractor. @@ -12,7 +12,9 @@ import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig */ class MoleculePurchasesConfig( val drugClasses: List[String], - val maxBoxQuantity: Int) extends ExtractorConfig with Serializable + val maxBoxQuantity: Int) extends ExtractorConfig with ExtractorCodes { + override def isEmpty: Boolean = drugClasses.isEmpty +} object MoleculePurchasesConfig { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala index 133d140b..8e465de2 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala @@ -1,19 +1,67 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts -import scala.reflect.runtime.universe._ + +import org.apache.spark.sql.{Column, DataFrame, Row} import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{Column, DataFrame, Dataset, Row} import fr.polytechnique.cmap.cnam.etl.events.{DcirNgapAct, Event, EventBuilder, NgapAct} -import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.Extractor +import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirRowExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -class DcirNgapActExtractor(ngapActsConfig: NgapActConfig) extends DcirExtractor[NgapAct] { +final case class DcirNgapActExtractor(ngapActsConfig: NgapActConfig[NgapWithNatClassConfig]) + extends Extractor[NgapAct, NgapActConfig[NgapWithNatClassConfig]] with DcirRowExtractor { + + val columnName: String = ColNames.NaturePrestation + val eventBuilder: EventBuilder = DcirNgapAct + val ngapKeyLetterCol: String = "PRS_NAT_CB2" + + final val PrivateInstitutionCodes = Set(4D, 5D, 6D, 7D) + + override def getInput(sources: Sources): DataFrame = { + + val neededColumns: List[Column] = List( + ColNames.PatientID, ColNames.NaturePrestation, ColNames.NgapCoefficient, + ColNames.DcirEventStart, ColNames.ExecPSNum, ColNames.DcirFluxDate, ngapKeyLetterCol, + ColNames.Sector, ColNames.GHSCode, ColNames.InstitutionCode + ).map(col) - private final val PrivateInstitutionCodes = Set(4D, 5D, 6D, 7D) + lazy val irNat = sources.irNat.get + lazy val dcir = sources.dcir.get - override val columnName: String = ColNames.NaturePrestation - override val eventBuilder: EventBuilder = DcirNgapAct - val ngapKeyLetter: String = "PRS_NAT_CB2" + lazy val df: DataFrame = dcir.join(irNat, dcir(ColNames.NaturePrestation).cast("String") === irNat("PRS_NAT")) + df.select(neededColumns: _*) + } + + override def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(ngapKeyLetterCol)) + + override def isInStudy(row: Row): Boolean = { + + lazy val prsNatRef = row.getAs[Int](ColNames.NaturePrestation).toString + lazy val ngapKeyLetter = row.getAs[String](ngapKeyLetterCol) + lazy val ngapCoefficient = row.getAs[Double](ColNames.NgapCoefficient).toString + + ngapActsConfig.actsCategories + .exists( + category => { + category.ngapPrsNatRefs.contains(prsNatRef) || { + category.ngapKeyLetters.contains(ngapKeyLetter) && category.ngapCoefficients.contains(ngapCoefficient) + } + } + ) + } + + def builder(row: Row): Seq[Event[NgapAct]] = { + val patientId = extractPatientId(row) + val groupId = extractGroupId(row) + val value = extractValue(row) + val eventDate = extractStart(row) + val endDate = extractEnd(row) + val weight = extractWeight(row) + + Seq(eventBuilder[NgapAct](patientId, groupId, value, weight, eventDate, endDate)) + } /** * We extract Ngap acts as a concatenation of three different ways to identify specific ngap acts in the SNDS : @@ -25,10 +73,10 @@ class DcirNgapActExtractor(ngapActsConfig: NgapActConfig) extends DcirExtractor[ * * @return concatenation of the three codes */ - override def code: Row => String = (row: Row) => { - row.getAs[Int](ColNames.NaturePrestation).toString + "_" + - row.getAs[String](ngapKeyLetter) + "_" + + def extractValue(row: Row): String = { + s"${row.getAs[Int](ColNames.NaturePrestation)}_${row.getAs[String](ngapKeyLetterCol)}_${ row.getAs[Double](ColNames.NgapCoefficient).toString + }" } override def extractGroupId(r: Row): String = { @@ -54,75 +102,11 @@ class DcirNgapActExtractor(ngapActsConfig: NgapActConfig) extends DcirExtractor[ } } - def getGHS(r: Row): Double = r.getAs[Double](ColNames.GHSCode) - - def getInstitutionCode(r: Row): Double = r.getAs[Double](ColNames.InstitutionCode) - - def getSector(r: Row): Double = r.getAs[Double](ColNames.Sector) - - override def extractWeight(r: Row): Double = 1.0 - - override def extract( - sources: Sources, - codes: Set[String]) - (implicit ctag: TypeTag[NgapAct]): Dataset[Event[NgapAct]] = { - - val input: DataFrame = getInput(sources) - - import input.sqlContext.implicits._ - - { - if (ngapActsConfig.actsCategories.isEmpty) { - input.filter(isInExtractorScope _) - } - else { - input.filter(isInExtractorScope _).filter(isInStudy(codes) _) - } - }.flatMap(builder _).distinct() - } - - override def getInput(sources: Sources): DataFrame = { + private def getGHS(r: Row): Double = r.getAs[Double](ColNames.GHSCode) - val neededColumns: List[Column] = List( - ColNames.PatientID, ColNames.NaturePrestation, ColNames.NgapCoefficient, - ColNames.Date, ColNames.ExecPSNum, ColNames.DcirFluxDate, ngapKeyLetter, - ColNames.Sector, ColNames.GHSCode, ColNames.InstitutionCode - ).map(colName => col(colName)) + private def getInstitutionCode(r: Row): Double = r.getAs[Double](ColNames.InstitutionCode) - lazy val irNat = sources.irNat.get - lazy val dcir = sources.dcir.get - - lazy val df: DataFrame = dcir.join(irNat, dcir("PRS_NAT_REF").cast("String") === irNat("PRS_NAT")) - df.select(neededColumns: _*) - } + private def getSector(r: Row): Double = r.getAs[Double](ColNames.Sector) - override def isInExtractorScope(row: Row): Boolean = { - !row.isNullAt(row.fieldIndex(ngapKeyLetter)) - } - - override def isInStudy(codes: Set[String])(row: Row): Boolean = { - dcirIsInCategory( - ngapActsConfig.actsCategories, - row - ) - } - - def dcirIsInCategory( - categories: List[NgapActClassConfig], - row: Row): Boolean = { - - val ngapKeyLetter: String = row.getAs[String]("PRS_NAT_CB2") - val ngapCoefficient: String = row.getAs[Double]("PRS_ACT_CFT").toString - val prsNatRef: String = row.getAs[Int]("PRS_NAT_REF").toString - - categories - .exists( - category => - ( - category.ngapKeyLetters.contains(ngapKeyLetter) && - category.ngapCoefficients.contains(ngapCoefficient) - ) || - category.ngapPrsNatRefs.contains(prsNatRef) - ) - } + override def getCodes: NgapActConfig[NgapWithNatClassConfig] = ngapActsConfig } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala index b323f6e8..145f69ef 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala @@ -1,108 +1,81 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts -import scala.reflect.runtime.universe._ import scala.util.Try +import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Dataset, Row} -import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeExtractor +import fr.polytechnique.cmap.cnam.etl.events.{Event, EventBuilder, McoCeFbstcNgapAct, McoCeFcstcNgapAct, NgapAct} +import fr.polytechnique.cmap.cnam.etl.extractors.Extractor +import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeRowExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -trait McoCeNgapActExtractor extends McoCeExtractor[NgapAct] { - val ngapActsConfig: NgapActConfig +sealed abstract class McoCeNgapActExtractor(ngapActsConfig: NgapActConfig[NgapActClassConfig]) extends Extractor[NgapAct, NgapActConfig[NgapActClassConfig]] + with McoCeRowExtractor { + // abstract values for implementing classes val keyLetterColumn: String val coeffColumn: String + val eventBuilder: EventBuilder - val columnName: String = keyLetterColumn + // Implementation of the EventRowExtractor + override def usedColumns: List[String] = super.usedColumns ++ List(keyLetterColumn, coeffColumn) - override def code: Row => String = (row: Row) => { - val coeff = Try(row.getAs[Double](coeffColumn).toString) recover { - case _: NullPointerException => "0" - } - "PmsiCe_" + row.getAs[String](keyLetterColumn) + "_" + coeff.get + def extractValue(row: Row): String = { + val letter = getNgapLetter(row) + val coeff = getNgapCoeff(row) + s"PmsiCe_${letter}_${coeff}" } - override def extract( - sources: Sources, - codes: Set[String]) - (implicit ctag: TypeTag[NgapAct]): Dataset[Event[NgapAct]] = { - - val input: DataFrame = getInput(sources) + // Implementation of the Extractor Trait + override def getCodes: NgapActConfig[NgapActClassConfig] = ngapActsConfig - import input.sqlContext.implicits._ + override def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(keyLetterColumn)) - { - if (ngapActsConfig.actsCategories.isEmpty) { - input.filter(isInExtractorScope _) - } - else { - input.filter(isInExtractorScope _).filter(isInStudy(codes) _) - } - }.flatMap(builder _).distinct() + override def isInStudy(row: Row): Boolean = { + lazy val letter = getNgapLetter(row) + lazy val coeff = getNgapCoeff(row) + ngapActsConfig.actsCategories.exists(category => ngapIsInCategory(category, letter, coeff)) } - override def isInStudy(codes: Set[String])(row: Row): Boolean = { - pmsiIsInCategories( - ngapActsConfig.actsCategories, - keyLetterColumn, - coeffColumn, - row - ) - } + private def ngapIsInCategory(category: NgapActClassConfig, ngapLetter: => String, ngapCoeff: => String): Boolean = + category.ngapKeyLetters.contains(ngapLetter) && { + category.ngapCoefficients.isEmpty || category.ngapCoefficients.contains(ngapCoeff) + } - /** User could be interested by different Ngap categories each defined by a list of key letters - * and a list of coefficients. This function iterates over each category. More détails in the NgapActConfig class. - * - * @param categories : A list of Ngap prestation and coefficient codes - * @param ngapKeyColumn : the Ngap prestation code for MCO CE - * @param ngapCoeffColumn : the Ngap coefficient which complete the prestation code for MCO CE - * @param row - * @return - */ - def pmsiIsInCategories( - categories: List[NgapActClassConfig], - ngapKeyColumn: String, - ngapCoeffColumn: String, - row: Row): Boolean = { - - val letter = row.getAs[String](ngapKeyColumn) - val coeff = Try(row.getAs[Double](ngapCoeffColumn).toString) recover { + private def getNgapLetter(row: Row): String = row.getAs[String](keyLetterColumn) + private def getNgapCoeff(row: Row): String = { + Try(row.getAs[Double](coeffColumn).toString) recover { case _: NullPointerException => "0" } + }.get - categories - .exists(category => pmsiIsInCategory(category, letter, coeff.get)) - } - def pmsiIsInCategory( - category: NgapActClassConfig, - ngapLetter: String, - ngapCoeff: String): Boolean = { - if (category.ngapCoefficients.isEmpty) { - category.ngapKeyLetters.contains(ngapLetter) - } - else { - category.ngapCoefficients.contains(ngapCoeff) && - category.ngapKeyLetters.contains(ngapLetter) - } - } + def builder(row: Row): Seq[Event[NgapAct]] = { + val patientId = extractPatientId(row) + val groupId = extractGroupId(row) + val value = extractValue(row) + val eventDate = extractStart(row) + val endDate = extractEnd(row) + val weight = extractWeight(row) - override def getInput(sources: Sources): DataFrame = { - sources.mcoCe.get.select((coeffColumn :: keyLetterColumn :: ColNames.core).map(col): _*) + Seq(eventBuilder[NgapAct](patientId, groupId, value, weight, eventDate, endDate)) } + + override def getInput(sources: Sources): DataFrame = sources.mcoCe.get.select(usedColumns.map(col): _*) } -class McoCeFbstcNgapActExtractor(ngapConfig: NgapActConfig) extends McoCeNgapActExtractor { +final case class McoCeFbstcNgapActExtractor(ngapConfig: NgapActConfig[NgapActClassConfig]) extends McoCeNgapActExtractor(ngapConfig) { val keyLetterColumn: String = ColNames.NgapKeyLetterFbstc - override val columnName: String = keyLetterColumn + val coeffColumn: String = ColNames.NgapCoefficientFbstc override val eventBuilder: EventBuilder = McoCeFbstcNgapAct val ngapActsConfig: NgapActConfig = ngapConfig val coeffColumn: String = ColNames.NgapCoefficientFbstc } -class McoCeFcstcNgapActExtractor(ngapConfig: NgapActConfig) extends McoCeNgapActExtractor { +final case class McoCeFcstcNgapActExtractor(ngapConfig: NgapActConfig[NgapActClassConfig]) extends McoCeNgapActExtractor(ngapConfig) { val keyLetterColumn: String = ColNames.NgapKeyLetterFcstc - override val columnName: String = keyLetterColumn + val coeffColumn: String = ColNames.NgapCoefficientFcstc + override val eventBuilder: EventBuilder = McoCeFcstcNgapAct val ngapActsConfig: NgapActConfig = ngapConfig val coeffColumn: String = ColNames.NgapCoefficientFcstc diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala index 0bfe63dc..ee2da23f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala @@ -1,12 +1,20 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts -trait NgapActClassConfig extends Serializable { - """ - ngapCoefficients should always be specified with the dot separation for float, as this is how they are coded in the snds. - eg: "2.0" should be used instead of "2" - """.stripMargin - //val name: String - val ngapKeyLetters: Seq[String] - val ngapCoefficients: Seq[String] - val ngapPrsNatRefs: Seq[String] = Seq() +//ngapCoefficients should always be specified with the dot separation for float, as this is how they are coded in the snds. +// eg: "2.0" should be used instead of "2" +class NgapActClassConfig( + val ngapKeyLetters: Seq[String], + val ngapCoefficients: Seq[String]) extends Serializable + +object NgapActClassConfig { + def apply(ngapKeyLetters: Seq[String], ngapCoefficients: Seq[String]): NgapActClassConfig = + new NgapActClassConfig(ngapKeyLetters,ngapCoefficients) } + +// If your Extractor add the Information from IR_NAT_V reference table use this. +class NgapWithNatClassConfig( + override val ngapKeyLetters: Seq[String], + override val ngapCoefficients: Seq[String], + val ngapPrsNatRefs: Seq[String]) extends NgapActClassConfig(ngapKeyLetters, ngapCoefficients) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala index b566999d..86ede2ed 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala @@ -1,6 +1,8 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts -import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig +import fr.polytechnique.cmap.cnam.etl.extractors.{ExtractorCodes, ExtractorConfig} /** * NgapActConfig defines three different ways to filter for specific ngap acts in the SNDS : @@ -22,12 +24,12 @@ import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig * * @param actsCategories List of configuration to get specific NgapActs */ -class NgapActConfig( - val actsCategories: List[NgapActClassConfig]) extends ExtractorConfig with Serializable { +class NgapActConfig[+C <: NgapActClassConfig]( + val actsCategories: List[C]) extends ExtractorConfig with ExtractorCodes { + override def isEmpty: Boolean = actsCategories.isEmpty } object NgapActConfig { - def apply(actsCategories: List[NgapActClassConfig]): NgapActConfig = new NgapActConfig( - actsCategories - ) + + def apply[C <: NgapActClassConfig](actsCategories: List[C]): NgapActConfig[C] = new NgapActConfig[C](actsCategories) } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala index 629ce0fc..5d125fd7 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala @@ -2,10 +2,10 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients +import org.apache.spark.sql.{Column, DataFrame, Dataset} import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ -import org.apache.spark.sql.{Column, DataFrame, Dataset} import fr.polytechnique.cmap.cnam.etl.extractors.patients.PatientUtils._ import fr.polytechnique.cmap.cnam.etl.patients.Patient diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala index adf88d75..61dfcb15 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala @@ -1,8 +1,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients -import fr.polytechnique.cmap.cnam.util.functions.computeDateUsingMonthYear -import org.apache.spark.sql.functions._ import org.apache.spark.sql.{Column, DataFrame} +import org.apache.spark.sql.functions._ +import fr.polytechnique.cmap.cnam.util.functions.computeDateUsingMonthYear private[patients] object HadPatients { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala index b30c183f..6503955a 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala @@ -2,9 +2,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients +import org.apache.spark.sql.{Column, DataFrame, Dataset} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.TimestampType -import org.apache.spark.sql.{Column, DataFrame, Dataset} import fr.polytechnique.cmap.cnam.etl.patients.Patient import fr.polytechnique.cmap.cnam.util.functions.computeDateUsingMonthYear diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala index 6b0bdc57..2a5dfeea 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala @@ -2,8 +2,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients -import org.apache.spark.sql.functions._ import org.apache.spark.sql.{Column, DataFrame} +import org.apache.spark.sql.functions._ import fr.polytechnique.cmap.cnam.util.functions.computeDateUsingMonthYear private[patients] object McoPatients { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala index 02963d06..13bb1f6e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala @@ -2,10 +2,10 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients +import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DoubleType, IntegerType, TimestampType} -import org.apache.spark.sql.{DataFrame, Dataset} import fr.polytechnique.cmap.cnam.etl.extractors.patients.PatientUtils.estimateBirthDateCol import fr.polytechnique.cmap.cnam.etl.patients.Patient diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala index 3f9f10a0..e80cf7db 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala @@ -3,8 +3,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients import java.sql.Timestamp -import org.apache.spark.sql.functions._ import org.apache.spark.sql.{Column, DataFrame, Dataset} +import org.apache.spark.sql.functions._ import fr.polytechnique.cmap.cnam.etl.patients._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.datetime.implicits._ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/McoCeSpecialtyExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/McoCeSpecialtyExtractor.scala new file mode 100644 index 00000000..1ce4bf82 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/McoCeSpecialtyExtractor.scala @@ -0,0 +1,36 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.prestations + +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, McoCeFbstcMedicalPractitionerClaim, McoCeFcstcMedicalPractitionerClaim, PractitionerClaimSpeciality} +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, IsInStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeBasicExtractor + +/** + * Get specialties of the non medical practitioners in the MCO_CE: + * If a specialty is available, it extracts the specialty using MCO_FBSTC_ _EXE_SPE and MCO_FCSTC_ _EXE_SPE. + * These two columns are complementary as described here : + * https://documentation-snds.health-data-hub.fr/fiches/actes_consult_externes.html#les-tables-du-pmsi-version-snds-pour-les-ace + **/ +sealed abstract class McoCeSpecialtyExtractor(codes: BaseExtractorCodes) extends McoCeBasicExtractor[PractitionerClaimSpeciality] + with IsInStrategy[PractitionerClaimSpeciality] { + override def extractValue(row: Row): String = row.getAs[Int](columnName).toString + + override def isInExtractorScope(row: Row): Boolean = { + (!row.isNullAt(row.fieldIndex(columnName))) & (row.getAs[Integer](columnName) != 0) + } + + override def getCodes: BaseExtractorCodes = codes +} + +final case class McoCeFbstcSpecialtyExtractor(codes: BaseExtractorCodes) extends McoCeSpecialtyExtractor(codes) { + override val columnName: String = ColNames.PractitionnerSpecialtyFbstc + override val eventBuilder: EventBuilder = McoCeFbstcMedicalPractitionerClaim +} + + +final case class McoCeFcstcSpecialtyExtractor(codes: BaseExtractorCodes) extends McoCeSpecialtyExtractor(codes) { + override val columnName: String = ColNames.PractitionnerSpecialtyFcstc + override val eventBuilder: EventBuilder = McoCeFcstcMedicalPractitionerClaim +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala index bddbf5a1..7837f38d 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala @@ -3,24 +3,16 @@ package fr.polytechnique.cmap.cnam.etl.extractors.prestations import java.sql.Timestamp -import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeExtractor -import org.apache.spark.sql.{DataFrame, Row} import scala.util.Try -import org.apache.spark.sql.functions.col -import fr.polytechnique.cmap.cnam.etl.sources.Sources +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, IsInStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirBasicExtractor -/** - * Get specialties of medical practitionner in the Dcir: - * If a specialty is available, it extracts the specialty using PSE_SPE_COD and the practitioner - * identifier from the database. - */ -object MedicalPractitionerClaimExtractor extends DcirExtractor[PractitionerClaimSpeciality] { - override val columnName: String = ColNames.MSpe - override val eventBuilder: EventBuilder = MedicalPractitionerClaim +sealed abstract class DcirPractitionerSpecialityExtractor(codes: BaseExtractorCodes) + extends DcirBasicExtractor[PractitionerClaimSpeciality] with IsInStrategy[PractitionerClaimSpeciality] { - override def code: Row => String = (row: Row) => row.getAs[Integer](columnName).toString + override def usedColumns: List[ColName] = ColNames.ExecPSNum :: super.usedColumns override def extractStart(r: Row): Timestamp = { Try(super.extractStart(r)) recover { @@ -32,75 +24,34 @@ object MedicalPractitionerClaimExtractor extends DcirExtractor[PractitionerClaim r.getAs[String](ColNames.ExecPSNum) } - override def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.contains(code(row)) + override def extractValue(row: Row): String = row.getAs[Integer](columnName).toString override def isInExtractorScope(row: Row): Boolean = { (!row.isNullAt(row.fieldIndex(columnName))) & (row.getAs[Integer](columnName) != 0) } + + override def getCodes: BaseExtractorCodes = codes } /** - * Get specialties of the non medical practitionners in the Dcir: - * If a specialty is available, it extracts the specialty using PSE_ACT_NAT and the practitioner + * Get specialties of medical practitioners in the Dcir: + * If a specialty is available, it extracts the specialty using PSE_SPE_COD and the practitioner * identifier from the database. */ -object NonMedicalPractitionerClaimExtractor extends DcirExtractor[PractitionerClaimSpeciality] { - override val columnName: String = ColNames.NonMSpe - override val eventBuilder: EventBuilder = NonMedicalPractitionerClaim - - override def code: Row => String = (row: Row) => row.getAs[Integer](columnName).toString - - override def extractStart(r: Row): Timestamp = { - Try(super.extractStart(r)) recover { - case _: NullPointerException => extractFluxDate(r) - } - }.get - - override def extractGroupId(r: Row): String = { - r.getAs[String](ColNames.ExecPSNum) - } - - override def isInExtractorScope(row: Row): Boolean = { - (!row.isNullAt(row.fieldIndex(columnName))) & (row.getAs[Integer](columnName) != 0) - } - - override def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.contains(code(row)) -} - - -/** - * Get specialties of the non medical practitioners in the MCO_CE: - * If a specialty is available, it extracts the specialty using MCO_FBSTC_ _EXE_SPE and MCO_FCSTC_ _EXE_SPE. - * These two columns are complementary as described here : - * https://documentation-snds.health-data-hub.fr/fiches/actes_consult_externes.html#les-tables-du-pmsi-version-snds-pour-les-ace - **/ -trait McoCeSpecialtyExtractor extends McoCeExtractor[PractitionerClaimSpeciality] { +final case class MedicalPractitionerClaimExtractor(codes: BaseExtractorCodes) + extends DcirPractitionerSpecialityExtractor(codes) { + override val columnName: String = ColNames.MSpe override val eventBuilder: EventBuilder = MedicalPractitionerClaim - - override def code: Row => String = (row: Row) => row.getAs[Int](columnName).toString - - - override def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.contains(code(row)) - - override def isInExtractorScope(row: Row): Boolean = { - (!row.isNullAt(row.fieldIndex(columnName))) & (row.getAs[Integer](columnName) != 0) - } - - override def getInput(sources: Sources): DataFrame = { - sources.mcoCe.get.select((columnName :: ColNames.core).map(col): _*) - } -} - -object McoCeFbstcSpecialtyExtractor extends McoCeSpecialtyExtractor { - override val columnName: String = ColNames.PractitionnerSpecialtyFbstc - override val eventBuilder: EventBuilder = McoCeFbstcMedicalPractitionerClaim } -object McoCeFcstcSpecialtyExtractor extends McoCeSpecialtyExtractor { - override val columnName: String = ColNames.PractitionnerSpecialtyFcstc - override val eventBuilder: EventBuilder = McoCeFcstcMedicalPractitionerClaim +/** + * Get specialties of the non medical practitioners in the Dcir: + * If a specialty is available, it extracts the specialty using PSE_ACT_NAT and the practitioner + * identifier from the database. + */ +final case class NonMedicalPractitionerClaimExtractor(codes: BaseExtractorCodes) + extends DcirPractitionerSpecialityExtractor(codes) { + override val columnName: String = ColNames.NonMSpe + override val eventBuilder: EventBuilder = NonMedicalPractitionerClaim } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrBasicExtractor.scala new file mode 100644 index 00000000..e33ad4d6 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrBasicExtractor.scala @@ -0,0 +1,13 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.ssr + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col +import fr.polytechnique.cmap.cnam.etl.events.AnyEvent +import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +trait SsrBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with SsrRowExtractor { + def getInput(sources: Sources): DataFrame = sources.ssr.get.estimateStayStartTime.select(neededColumns.map(col): _*) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrExtractor.scala deleted file mode 100644 index 3578dfaa..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrExtractor.scala +++ /dev/null @@ -1,47 +0,0 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.ssr - -import java.sql.Timestamp -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{DataFrame, Row} -import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} -import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} -import fr.polytechnique.cmap.cnam.etl.sources.Sources - -trait SsrExtractor[EventType <: AnyEvent] extends Extractor[EventType] with SsrSource with EventRowExtractor { - - val columnName: String - - val eventBuilder: EventBuilder - - def getInput(sources: Sources): DataFrame = sources.ssr.get.select(ColNames.all.map(col): _*).estimateStayStartTime - - def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(code(row).startsWith(_)) - - def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(columnName)) - - def builder(row: Row): Seq[Event[EventType]] = { - lazy val patientId = extractPatientId(row) - lazy val groupId = extractGroupId(row) - lazy val eventDate = extractStart(row) - lazy val endDate = extractEnd(row) - lazy val weight = extractWeight(row) - - Seq(eventBuilder[EventType](patientId, groupId, code(row), weight, eventDate, endDate)) - } - - def code = (row: Row) => row.getAs[String](columnName) - - def extractPatientId(r: Row): String = { - r.getAs[String](ColNames.PatientID) - } - - override def extractGroupId(r: Row): String = { - r.getAs[String](ColNames.EtaNum) + "_" + - r.getAs[String](ColNames.RhaNum) + "_" + - r.getAs[String](ColNames.RhsNum) + "_" + - r.getAs[Int](ColNames.Year).toString - } - - def extractStart(r: Row): Timestamp = r.getAs[Timestamp](NewColumns.EstimatedStayStart) -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrRowExtractor.scala new file mode 100644 index 00000000..2c761e54 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrRowExtractor.scala @@ -0,0 +1,26 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.ssr + +import java.sql.Timestamp +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.functions.col +import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} +import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor, ExtractorCodes} +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +trait SsrRowExtractor extends SsrSource with EventRowExtractor { + + override def usedColumns: List[String] = ColNames.core ++ super.usedColumns + + def extractPatientId(r: Row): String = { + r.getAs[String](ColNames.PatientID) + } + + override def extractGroupId(r: Row): String = { + r.getAs[String](ColNames.EtaNum) + "_" + + r.getAs[String](ColNames.RhaNum) + "_" + + r.getAs[String](ColNames.RhsNum) + "_" + + r.getAs[Int](ColNames.Year).toString + } + + def extractStart(r: Row): Timestamp = r.getAs[Timestamp](NewColumns.EstimatedStayStart) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala index 5c4fd4b9..b6efc9e1 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala @@ -1,8 +1,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ssr +import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{LongType, TimestampType} -import org.apache.spark.sql.{Column, DataFrame} import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames import fr.polytechnique.cmap.cnam.util.ColumnUtilities.parseTimestamp @@ -10,25 +10,36 @@ trait SsrSource extends ColumnNames { final object ColNames extends Serializable { val PatientID: ColName = "NUM_ENQ" + val StayStartMonth: ColName = "MOI_LUN_1S" + val StayStartYear: ColName = "ANN_LUN_1S" + val StayStartDate: ColName = "ENT_DAT" + val StayEndDate: ColName = "SOR_DAT" + val StartDate: ColName = "EXE_SOI_DTD" + val EndDate: ColName = "EXE_SOI_DTF" + val EtaNum: ColName = "ETA_NUM" + val RhaNum: ColName = "RHA_NUM" + val RhsNum: ColName = "RHS_NUM" + val Year: ColName = "year" + + val core = List( + PatientID, StayStartMonth, StayStartYear, StayStartDate, StayStartDate, StayEndDate, StartDate, EndDate, + EtaNum, RhaNum, RhsNum, Year, NewColumns.EstimatedStayStart + ) + val StayLength: ColName = "SSR_B__RHS_ANT_SEJ_ENT" val DP: ColName = "SSR_B__MOR_PRP" + val DR: ColName = "SSR_B__ETL_AFF" + val DA: ColName = "SSR_D__DGN_COD" + val CCAM: ColName = "SSR_CCAM__CCAM_ACT" // present only in 2014-2015-2016, should be addeed for the studies on the echantillon + val CSARR: ColName = "SSR_CSARR__CSARR_COD" + val FP_PEC: ColName = "SSR_B__FP_PEC" // MOI_ANN_SOR_SEJ ? //val GHM: ColName = "SSR_B__GRG_GHM" -> GME TODO - val EtaNum: ColName = "ETA_NUM" - val RhaNum: ColName = "RHA_NUM" - val RhsNum: ColName = "RHS_NUM" - val Year: ColName = "year" - val StayStartMonth: ColName = "MOI_LUN_1S" - val StayStartYear: ColName = "ANN_LUN_1S" - val StayLength: ColName = "SSR_B__RHS_ANT_SEJ_ENT" - val StayStartDate: ColName = "ENT_DAT" - val StayEndDate: ColName = "SOR_DAT" - val StartDate: ColName = "EXE_SOI_DTD" - val EndDate: ColName = "EXE_SOI_DTF" + val all = List( PatientID, DP, DR, DA, CCAM, CSARR, FP_PEC, EtaNum, RhaNum, RhsNum, StayLength, //CSARR, StayStartDate, StayEndDate, StartDate, EndDate, Year, StayStartMonth, StayStartYear diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeBasicExtractor.scala new file mode 100644 index 00000000..6b10e010 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeBasicExtractor.scala @@ -0,0 +1,14 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.ssrce + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col +import fr.polytechnique.cmap.cnam.etl.events.AnyEvent +import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +trait SsrCeBasicExtractor [EventType <: AnyEvent] extends BasicExtractor[EventType] with SsrCeRowExtractor { + def getInput(sources: Sources): DataFrame = sources.ssrCe.get.select(neededColumns.map(col): _*) + +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeRowExtractor.scala new file mode 100644 index 00000000..0b640fd1 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeRowExtractor.scala @@ -0,0 +1,15 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.ssrce + +import java.sql.Timestamp +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor + +trait SsrCeRowExtractor extends SsrCeSource with EventRowExtractor { + override def usedColumns: List[String] = ColNames.core ++ super.usedColumns + + override def extractPatientId(row: Row): String = row.getAs[String](ColNames.PatientID) + + override def extractStart(row: Row): Timestamp = row.getAs[Timestamp](ColNames.StartDate) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeSource.scala new file mode 100644 index 00000000..568ba0a1 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeSource.scala @@ -0,0 +1,18 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.ssrce + +import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames + +trait SsrCeSource extends ColumnNames { + final object ColNames extends Serializable { + final lazy val PatientID = "NUM_ENQ" + final lazy val StartDate = "EXE_SOI_DTD" + final lazy val core = List( + PatientID, StartDate + ) + + final lazy val CamCode = "SSR_FMSTC__CCAM_COD" + final lazy val all = List(PatientID, CamCode, StartDate) + } +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOverReasonExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOverReasonExtractor.scala index e2a62949..e8a83990 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOverReasonExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOverReasonExtractor.scala @@ -1,28 +1,27 @@ package fr.polytechnique.cmap.cnam.etl.extractors.takeOverReasons -import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.had.HadExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.takeOverReasons.HadMainTakeOverExtractor.code import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HadAssociatedTakeOver, HadMainTakeOver, MedicalTakeOverReason} +import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, IsInStrategy} +import fr.polytechnique.cmap.cnam.etl.extractors.had.HadBasicExtractor -object HadMainTakeOverExtractor extends HadExtractor[MedicalTakeOverReason] { - - final override val columnName: String = ColNames.PEC_PAL +final case class HadMainTakeOverExtractor(codes: BaseExtractorCodes) extends HadBasicExtractor[MedicalTakeOverReason] + with IsInStrategy[MedicalTakeOverReason] { + override val columnName: String = ColNames.PEC_PAL override val eventBuilder: EventBuilder = HadMainTakeOver - override def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(code(row) == _) -} + override def extractValue(row: Row): String = row.getAs[Int](columnName).toString -object HadAssociatedTakeOverExtractor extends HadExtractor[MedicalTakeOverReason] { + override def getCodes: BaseExtractorCodes = codes +} - final override val columnName: String = ColNames.PEC_ASS +final case class HadAssociatedTakeOverExtractor(codes: BaseExtractorCodes) extends HadBasicExtractor[MedicalTakeOverReason] + with IsInStrategy[MedicalTakeOverReason] { + override val columnName: String = ColNames.PEC_ASS override val eventBuilder: EventBuilder = HadAssociatedTakeOver + override def extractValue(row: Row): String = row.getAs[Int](columnName).toString - override def isInStudy(codes: Set[String]) - (row: Row): Boolean = codes.exists(code(row) == _) + override def getCodes: BaseExtractorCodes = codes } - - diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala index d2ba80b8..0d8442a0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala @@ -5,12 +5,17 @@ package fr.polytechnique.cmap.cnam.study.bulk import java.io.PrintWriter import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main +import fr.polytechnique.cmap.cnam.etl.extractors.acts.{DcirMedicalActExtractor, McoCcamActExtractor, McoCeCcamActExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.classifications.GhmExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ +import fr.polytechnique.cmap.cnam.etl.extractors.drugs.DrugExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.study.bulk.extractors._ import fr.polytechnique.cmap.cnam.util.reporting.MainMetadata -object BulkMain extends Main { +/*object BulkMain extends Main { override def appName: String = "BulkMain" override def run( @@ -48,4 +53,4 @@ object BulkMain extends Main { None } -} +}*/ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala index 8aec6d60..c692606c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala @@ -61,7 +61,7 @@ object FallMain extends Main with FractureCodes { def computeHospitalStays(sources: Sources, fallConfig: FallConfig): mutable.Buffer[OperationMetadata] = { val operationsMetadata = mutable.Buffer[OperationMetadata]() if (fallConfig.runParameters.hospitalStays) { - val hospitalStays = McoHospitalStaysExtractor.extract(sources, Set.empty).cache() + val hospitalStays = McoHospitalStaysExtractor.extract(sources).cache() operationsMetadata += { OperationReporter diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala index 1627e5fa..233fe62a 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala @@ -43,7 +43,7 @@ object FallMainExtract extends Main with FractureCodes { mutable.HashMap[String, OperationMetadata] = { if (fallConfig.runParameters.hospitalStays) { - val hospitalStays = McoHospitalStaysExtractor.extract(sources, Set.empty).cache() + val hospitalStays = McoHospitalStaysExtractor.extract(sources).cache() meta += { "extract_hospital_stays" -> OperationReporter diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala index 758a45be..10c2b2e3 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala @@ -4,17 +4,18 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, Event, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.acts._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.unionDatasets class ActsExtractor(config: MedicalActsConfig) { def extract(sources: Sources): Dataset[Event[MedicalAct]] = { - val dcirMedicalAct = DcirMedicalActExtractor.extract(sources, config.dcirCodes.toSet) + val dcirMedicalAct = DcirMedicalActExtractor(BaseExtractorCodes(config.dcirCodes)).extract(sources) .filter(act => act.groupID != DcirAct.groupID.Unknown) // filter out unknown source acts .filter(act => act.groupID != DcirAct.groupID.PublicAmbulatory) //filter out public amb - val mcoCEMedicalActs = McoCeActExtractor.extract(sources, config.mcoCECodes.toSet) - val mcoMedicalActs = McoCcamActExtractor.extract(sources, config.mcoCCAMCodes.toSet) + val mcoCEMedicalActs = McoCeCcamActExtractor(BaseExtractorCodes(config.mcoCECodes)).extract(sources) + val mcoMedicalActs = McoCcamActExtractor(BaseExtractorCodes(config.mcoCCAMCodes)).extract(sources) unionDatasets(dcirMedicalAct, mcoCEMedicalActs, mcoMedicalActs) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala index b104cb86..c4024851 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala @@ -11,6 +11,6 @@ import fr.polytechnique.cmap.cnam.etl.sources.Sources object CardiacExtractor { def extract(sources: Sources): Dataset[Event[Drug]] = { - new DrugExtractor(DrugConfig(TherapeuticLevel, List(Cardiac))).extract(sources, Set.empty) + new DrugExtractor(DrugConfig(TherapeuticLevel, List(Cardiac))).extract(sources) } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ControlDrugs.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ControlDrugs.scala index 346a6a05..f26e8ecc 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ControlDrugs.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ControlDrugs.scala @@ -17,6 +17,6 @@ object ControlDrugs { List(Antihypertenseurs, Opioids, Cardiac, ProtonPumpInhibitors, Antiepileptics) ) ) - .extract(sources, Set.empty) + .extract(sources) } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala index b7386345..83132c9e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala @@ -4,6 +4,7 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.unionDatasets @@ -12,9 +13,9 @@ class DiagnosisExtractor(config: DiagnosesConfig) { def extract(sources: Sources): Dataset[Event[Diagnosis]] = { - val mainDiag = MainDiagnosisFallExtractor.extract(sources, config.dpCodes.toSet) - val linkedDiag = LinkedDiagnosisFallExtractor.extract(sources, config.drCodes.toSet) - val dasDiag = AssociatedDiagnosisFallExtractor.extract(sources, config.daCodes.toSet) + val mainDiag = MainDiagnosisFallExtractor(BaseExtractorCodes(config.dpCodes)).extract(sources) + val linkedDiag = LinkedDiagnosisFallExtractor(BaseExtractorCodes(config.drCodes)).extract(sources) + val dasDiag = AssociatedDiagnosisFallExtractor(BaseExtractorCodes(config.daCodes)).extract(sources) unionDatasets(mainDiag, linkedDiag, dasDiag) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DrugsExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DrugsExtractor.scala index 7390f842..2b0e589a 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DrugsExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DrugsExtractor.scala @@ -9,7 +9,6 @@ import fr.polytechnique.cmap.cnam.etl.sources.Sources class DrugsExtractor(drugConfig: DrugConfig) { - def extract(sources: Sources): Dataset[Event[Drug]] = { - new DrugExtractor(drugConfig).extract(sources, Set.empty) - } + def extract(sources: Sources): Dataset[Event[Drug]] = DrugExtractor(drugConfig).extract(sources) + } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/EpilepticsExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/EpilepticsExtractor.scala index 4584adad..ba5c0bd3 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/EpilepticsExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/EpilepticsExtractor.scala @@ -4,16 +4,17 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.{ImbDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.{ImbCimDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.unionDatasets object EpilepticsExtractor { def extract(sources: Sources): Dataset[Event[Diagnosis]] = { - val mainDiag = McoMainDiagnosisExtractor.extract(sources, Set("G40")) - val linkedDiag = McoLinkedDiagnosisExtractor.extract(sources, Set("G40")) - val imbDiag = ImbDiagnosisExtractor.extract(sources, Set("G40")) + val mainDiag = McoMainDiagnosisExtractor(BaseExtractorCodes(List("G40"))).extract(sources) + val linkedDiag = McoLinkedDiagnosisExtractor(BaseExtractorCodes(List("G40"))).extract(sources) + val imbDiag = ImbCimDiagnosisExtractor(BaseExtractorCodes(List("G40"))).extract(sources) unionDatasets(mainDiag, linkedDiag, imbDiag) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala index a1f54674..5e468855 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala @@ -11,6 +11,6 @@ import fr.polytechnique.cmap.cnam.etl.sources.Sources object HTAExtractor { def extract(sources: Sources): Dataset[Event[Drug]] = { - new DrugExtractor(DrugConfig(TherapeuticLevel, List(Antihypertenseurs))).extract(sources, Set.empty) + new DrugExtractor(DrugConfig(TherapeuticLevel, List(Antihypertenseurs))).extract(sources) } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala index abedb0ce..a520d1c7 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala @@ -6,11 +6,11 @@ import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} import fr.polytechnique.cmap.cnam.etl.extractors.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.ProtonPumpInhibitors -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.MoleculeCombinationLevel +import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.TherapeuticLevel import fr.polytechnique.cmap.cnam.etl.sources.Sources object IPPExtractor { def extract(sources: Sources): Dataset[Event[Drug]] = { - new DrugExtractor(DrugConfig(MoleculeCombinationLevel, List(ProtonPumpInhibitors))).extract(sources, Set.empty) + new DrugExtractor(DrugConfig(TherapeuticLevel, List(ProtonPumpInhibitors))).extract(sources) } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala index b564e352..23da2b7a 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala @@ -3,16 +3,24 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.events.Diagnosis +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.{McoAssociatedDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoRowExtractor import fr.polytechnique.cmap.cnam.study.fall.fractures.Surgery -trait ClassifyWeight extends McoExtractor[Diagnosis] with Surgery { +trait ClassifyWeight extends Surgery { + self : McoRowExtractor => + /** It gets ExitMode from row. + * + * @param r The row itself. + * @return The value of ExitMode. + */ + def getExit(r: Row): String = r.getAs[String](ColNames.ExitMode) + override def extractWeight(r: Row): Double = { if (!r.isNullAt(r.fieldIndex(ColNames.ExitMode)) && getExit(r).equals("9")) { 4 - } else if (!r.isNullAt(r.fieldIndex(ColNames.CCAM)) && codes.contains(r.getAs[String](ColNames.CCAM))) { + } else if (!r.isNullAt(r.fieldIndex(ColNames.CCAM)) && surgeryCodes.contains(r.getAs[String](ColNames.CCAM))) { 3 } else { 2 @@ -20,8 +28,20 @@ trait ClassifyWeight extends McoExtractor[Diagnosis] with Surgery { } } -object MainDiagnosisFallExtractor extends McoMainDiagnosisExtractor with ClassifyWeight +class MainDiagnosisFallExtractor(codes: BaseExtractorCodes) extends McoMainDiagnosisExtractor(codes) with ClassifyWeight + +object MainDiagnosisFallExtractor { + def apply(codes: BaseExtractorCodes): MainDiagnosisFallExtractor = new MainDiagnosisFallExtractor(codes) +} + +class AssociatedDiagnosisFallExtractor(codes: BaseExtractorCodes) extends McoAssociatedDiagnosisExtractor(codes) with ClassifyWeight + +object AssociatedDiagnosisFallExtractor { + def apply(codes: BaseExtractorCodes): AssociatedDiagnosisFallExtractor = new AssociatedDiagnosisFallExtractor(codes) +} -object AssociatedDiagnosisFallExtractor extends McoAssociatedDiagnosisExtractor with ClassifyWeight +class LinkedDiagnosisFallExtractor(codes: BaseExtractorCodes) extends McoLinkedDiagnosisExtractor(codes) with ClassifyWeight -object LinkedDiagnosisFallExtractor extends McoLinkedDiagnosisExtractor with ClassifyWeight +object LinkedDiagnosisFallExtractor { + def apply(codes: BaseExtractorCodes): LinkedDiagnosisFallExtractor = new LinkedDiagnosisFallExtractor(codes) +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala index bc43495a..6b642310 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala @@ -4,13 +4,13 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.MoleculeCombinationLevel -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.Opioids import fr.polytechnique.cmap.cnam.etl.extractors.drugs.{DrugConfig, DrugExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.Opioids +import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.TherapeuticLevel import fr.polytechnique.cmap.cnam.etl.sources.Sources object OpioidsExtractor { def extract(sources: Sources): Dataset[Event[Drug]] = { - new DrugExtractor(DrugConfig(MoleculeCombinationLevel, List(Opioids))).extract(sources, Set.empty) + new DrugExtractor(DrugConfig(TherapeuticLevel, List(Opioids))).extract(sources) } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFractures.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFractures.scala index 48fea1ef..07380a3b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFractures.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFractures.scala @@ -26,7 +26,7 @@ object PublicAmbulatoryFractures extends OutcomesTransformer with FractureCodes } def isPublicAmbulatory(event: Event[MedicalAct]): Boolean = { - event.category == McoCEAct.category + event.category == McoCeCcamAct.category } def containsNonHospitalizedCcam(event: Event[MedicalAct]): Boolean = { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/Surgery.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/Surgery.scala index c5689960..b146e699 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/Surgery.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/Surgery.scala @@ -3,7 +3,7 @@ package fr.polytechnique.cmap.cnam.study.fall.fractures trait Surgery { - val codes = Set( + val surgeryCodes = Set( "QAGA004", "QZGA003", "EEGA002", diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala index c9988219..3ac27403 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala @@ -118,7 +118,7 @@ object PioglitazoneMain extends Main { ) } - val rawHospitalStays = McoHospitalStaysExtractor.extract(sources, Set.empty).cache() + val rawHospitalStays = McoHospitalStaysExtractor.extract(sources).cache() operationsMetadata += { OperationReporter.report( "extract_hospital_stays", diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/Diagnoses.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/Diagnoses.scala index 68870447..b82e3fd4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/Diagnoses.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/Diagnoses.scala @@ -4,6 +4,7 @@ package fr.polytechnique.cmap.cnam.study.pioglitazone.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions @@ -12,9 +13,9 @@ class Diagnoses(config: DiagnosesConfig) { def extract(sources: Sources): Dataset[Event[Diagnosis]] = { - val mainDiag = McoMainDiagnosisExtractor.extract(sources, config.dpCodes.toSet) - val linkedDiag = McoLinkedDiagnosisExtractor.extract(sources, config.drCodes.toSet) - val associatedDiag = McoAssociatedDiagnosisExtractor.extract(sources, config.daCodes.toSet) + val mainDiag = McoMainDiagnosisExtractor(BaseExtractorCodes(config.dpCodes)).extract(sources) + val linkedDiag = McoLinkedDiagnosisExtractor(BaseExtractorCodes(config.drCodes)).extract(sources) + val associatedDiag = McoAssociatedDiagnosisExtractor(BaseExtractorCodes(config.daCodes)).extract(sources) //val imbDiag = ImbDiagnosisExtractor.extract(sources, config.imbCodes.toSet) functions.unionDatasets(mainDiag, linkedDiag, associatedDiag)//, imbDiag) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/MedicalActs.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/MedicalActs.scala index d5ff5062..6a629a9f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/MedicalActs.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/MedicalActs.scala @@ -4,6 +4,7 @@ package fr.polytechnique.cmap.cnam.study.pioglitazone.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Event, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.acts._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.unionDatasets @@ -11,10 +12,10 @@ import fr.polytechnique.cmap.cnam.util.functions.unionDatasets class MedicalActs(config: MedicalActsConfig) { def extract(sources: Sources): Dataset[Event[MedicalAct]] = { - val dcirActs = DcirMedicalActExtractor.extract(sources, config.dcirCodes.toSet) - val ccamActs = McoCcamActExtractor.extract(sources, config.mcoCCAMCodes.toSet) - val cimActs = McoCimMedicalActExtractor.extract(sources, config.mcoCIMCodes.toSet) + val dcirActs = DcirMedicalActExtractor(BaseExtractorCodes(config.dcirCodes)).extract(sources) + val ccamActs = McoCcamActExtractor(BaseExtractorCodes(config.mcoCCAMCodes)).extract(sources) + //val cimActs = McoCimMedicalActExtractor(BaseExtractorCodes(config.mcoCIMCodes)).extract(sources) - unionDatasets(dcirActs, ccamActs, cimActs) + unionDatasets(dcirActs, ccamActs) //, cimActs } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala index 55660af5..a1e6a873 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala @@ -85,7 +85,7 @@ object RosiglitazoneMain extends Main { ) } - val hospitalStays = McoHospitalStaysExtractor.extract(sources, Set.empty).cache() + val hospitalStays = McoHospitalStaysExtractor.extract(sources).cache() operationsMetadata += { OperationReporter.report( "extract_hospital_stays", diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/extractors/Diagnoses.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/extractors/Diagnoses.scala index c345e815..e748357b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/extractors/Diagnoses.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/extractors/Diagnoses.scala @@ -4,6 +4,7 @@ package fr.polytechnique.cmap.cnam.study.rosiglitazone.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions @@ -12,9 +13,9 @@ class Diagnoses(config: DiagnosesConfig) { def extract(sources: Sources): Dataset[Event[Diagnosis]] = { - val mainDiag = McoMainDiagnosisExtractor.extract(sources, config.dpCodes.toSet) - val linkedDiag = McoLinkedDiagnosisExtractor.extract(sources, config.drCodes.toSet) - val associatedDiag = McoAssociatedDiagnosisExtractor.extract(sources, config.daCodes.toSet) + val mainDiag = McoMainDiagnosisExtractor(BaseExtractorCodes(config.dpCodes)).extract(sources) + val linkedDiag = McoLinkedDiagnosisExtractor(BaseExtractorCodes(config.drCodes)).extract(sources) + val associatedDiag = McoAssociatedDiagnosisExtractor(BaseExtractorCodes(config.daCodes)).extract(sources) functions.unionDatasets(mainDiag, linkedDiag, associatedDiag) } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/PrescriptionExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/PrescriptionExtractorSuite.scala index 50af9060..198c7c2f 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/PrescriptionExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/PrescriptionExtractorSuite.scala @@ -6,7 +6,7 @@ import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, EventBuilder, EventCategory} -import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirRowExtractor trait MockEvent extends AnyEvent with EventBuilder @@ -17,12 +17,7 @@ object MockEventobject extends MockEvent { class PrescriptionExtractorSuite extends SharedContext { - trait DcirMockExtractor extends DcirExtractor[MockEvent] - - object MockPrescriptionExtractor extends DcirMockExtractor { - override val columnName: String = "" - override val eventBuilder: EventBuilder = MockEventobject - } + object MockPrescriptionExtractor extends DcirRowExtractor "extractGroupId" should "return the group ID for done values" in { // Given diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala index 8fc16072..42cc922d 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala @@ -1,250 +1,177 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts -import scala.util.Success - +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types._ import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{BiologyDcirAct, DcirAct, Event, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirSource import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.functions._ -import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema -import org.apache.spark.sql.types._ -import org.scalatest.matchers.should.Matchers.{an, convertToAnyShouldWrapper} -import org.scalatest.TryValues._ +import fr.polytechnique.cmap.cnam.util.functions.makeTS class DcirBiologyActsSuite extends SharedContext { - import DcirBiologyActExtractor.ColNames + val colNames = new DcirSource {}.ColNames val schema = StructType( - StructField(ColNames.PatientID, StringType) :: - StructField(ColNames.BioCode, StringType) :: - StructField(ColNames.InstitutionCode, DoubleType) :: - StructField(ColNames.GHSCode, DoubleType) :: - StructField(ColNames.Sector, DoubleType) :: - StructField(ColNames.Date, DateType) :: Nil + StructField(colNames.PatientID, StringType) :: + StructField(colNames.BioCode, StringType) :: + StructField(colNames.InstitutionCode, DoubleType) :: + StructField(colNames.GHSCode, DoubleType) :: + StructField(colNames.Sector, DoubleType) :: + StructField(colNames.FlowDistributionDate, DateType) :: Nil ) val oldSchema = StructType( - StructField(ColNames.PatientID, StringType) :: - StructField(ColNames.BioCode, StringType) :: - StructField(ColNames.Date, DateType) :: Nil + StructField(colNames.PatientID, StringType) :: + StructField(colNames.BioCode, StringType) :: + StructField(colNames.FlowDistributionDate, DateType) :: Nil ) - "isInStudy" should "return true when a study code is found in the row" in { - - // Given - val codes = Set("AAAA", "BBBB") - val inputArray = Array[Any]("Patient_A", "AAAA", null, null, null, makeTS(2010, 1, 1)) - val inputRow = new GenericRowWithSchema(inputArray, schema) - - // When - val result = DcirBiologyActExtractor.isInStudy(codes)(inputRow) - - // Then - assert(result) - } - it should "return false when no code is found in the row" in { // Given - val codes = Set("AAAA", "BBBB") + val codes = BaseExtractorCodes(List("AAAA", "BBBB")) val inputArray = Array[Any]("Patient_A", "CCCC", 1D, 0D, 1D, makeTS(2010, 1, 1)) val inputRow = new GenericRowWithSchema(inputArray, schema) // When - val result = DcirBiologyActExtractor.isInStudy(codes)(inputRow) + val result = DcirBiologyActExtractor(codes).isInStudy(inputRow) // Then assert(!result) } - "builder" should "return a DCIR act if the event is in a older version of DCIR" in { - // Given - val inputArray = Array[Any]("Patient_A", "AAAA", makeTS(2010, 1, 1)) - val inputRow = new GenericRowWithSchema(inputArray, oldSchema) - val expected = Seq(BiologyDcirAct("Patient_A", BiologyDcirAct.groupID.DcirAct, "AAAA", 1.0, makeTS(2010, 1, 1))) - - // When - val result = DcirBiologyActExtractor.builder(inputRow) - - // Then - assert(result == expected) - } - - "getGHS" should "return the value in the correct column" in { - // Given - val schema = StructType(StructField(ColNames.GHSCode, DoubleType) :: Nil) - val inputArray = Array[Any](3D) - val input = new GenericRowWithSchema(inputArray, schema) - val expected = 3D - - // When - val result = DcirBiologyActExtractor.getGHS(input) - - // Then - assert(result == expected) - } - - "getSector" should "return the expected value" in { - // Given - val schema = StructType(StructField(ColNames.Sector, DoubleType) :: Nil) - val inputArray = Array[Any](3D) - val input = new GenericRowWithSchema(inputArray, schema) - val expected = 3D - - // When - val result = DcirBiologyActExtractor.getSector(input) - - // Then - assert(result == expected) - } - - "getInstitutionCode" should "return the value in the correct column" in { - // Given - val schema = StructType(StructField(ColNames.InstitutionCode, DoubleType) :: Nil) - val inputArray = Array[Any](52D) - val input = new GenericRowWithSchema(inputArray, schema) - val expected = 52D - - // When - val result = DcirBiologyActExtractor.getInstitutionCode(input) - - // Then - assert(result == expected) - - } - "getGroupID" should "return correct status of private ambulatory" in { // Given val schema = StructType( - StructField(ColNames.GHSCode, DoubleType) :: - StructField(ColNames.Sector, StringType) :: - StructField(ColNames.InstitutionCode, DoubleType) :: Nil + StructField(colNames.GHSCode, DoubleType) :: + StructField(colNames.Sector, StringType) :: + StructField(colNames.InstitutionCode, DoubleType) :: Nil ) val array = Array[Any](0D, 2D, 6D) val input = new GenericRowWithSchema(array, schema) - val expected = Success(DcirAct.groupID.PrivateAmbulatory) // When - val result = DcirBiologyActExtractor.getGroupId(input) + val result = DcirBiologyActExtractor(BaseExtractorCodes(List("AAAA", "BBBB"))).extractGroupId(input) // Then - assert(result == expected) + assert(result == DcirAct.groupID.PrivateAmbulatory) } it should "return Success(PublicAmbulatory) if it is public related" in { // Given - val schema = StructType(StructField(ColNames.Sector, StringType) :: Nil) + val schema = StructType(StructField(colNames.Sector, StringType) :: Nil) val array = Array[Any](1D) val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor.getGroupId(input) + val result = DcirBiologyActExtractor(BaseExtractorCodes.empty).extractGroupId(input) // Then - result.success.value shouldBe DcirAct.groupID.PublicAmbulatory + assert(result == DcirAct.groupID.PublicAmbulatory) } it should "return Success(Liberal) if it is liberal act" in { // Given val schema = StructType( - StructField(ColNames.Sector, StringType) :: StructField( - ColNames.GHSCode, + StructField(colNames.Sector, StringType) :: StructField( + colNames.GHSCode, StringType ) :: Nil ) val array = Array[Any](null, null) val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor.getGroupId(input) + val result = DcirBiologyActExtractor(BaseExtractorCodes.empty).extractGroupId(input) // Then - result.success.value shouldBe DcirAct.groupID.Liberal + assert(result == DcirAct.groupID.Liberal) } it should "return Success(PrivateAmbulatory) if it is private ambulatory act" in { // Given val schema = StructType( - StructField(ColNames.Sector, StringType) :: StructField( - ColNames.GHSCode, + StructField(colNames.Sector, StringType) :: StructField( + colNames.GHSCode, DoubleType - ) :: StructField(ColNames.InstitutionCode, DoubleType) :: Nil + ) :: StructField(colNames.InstitutionCode, DoubleType) :: Nil ) val array = Array[Any](null, 0D, 4D) val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor.getGroupId(input) + val result = DcirBiologyActExtractor(BaseExtractorCodes.empty).extractGroupId(input) // Then - result.success.value shouldBe DcirAct.groupID.PrivateAmbulatory + assert(result == DcirAct.groupID.PrivateAmbulatory) } it should "return Success(UnkownSource) if it is an act with unknown source" in { // Given val schema = StructType( - StructField(ColNames.Sector, StringType) :: StructField( - ColNames.GHSCode, - DoubleType - ) :: StructField(ColNames.InstitutionCode, DoubleType) :: Nil + StructField(colNames.Sector, StringType) :: + StructField(colNames.GHSCode, DoubleType) :: + StructField(colNames.InstitutionCode, DoubleType) :: + Nil ) val array = Array[Any](null, 1D, 4D) val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor.getGroupId(input) + val result = DcirBiologyActExtractor(BaseExtractorCodes.empty).extractGroupId(input) // Then - result.success.value shouldBe DcirAct.groupID.Unknown + assert(result == DcirAct.groupID.Unknown) } - it should "return IllegalArgumentException if the information of source of act is unavailable in DCIR" in { + it should "return default value if the information of source of act is unavailable in DCIR" in { // Given val schema = StructType( - StructField(ColNames.GHSCode, DoubleType) :: - StructField(ColNames.Sector, StringType) :: Nil + StructField(colNames.GHSCode, DoubleType) :: + StructField(colNames.Sector, StringType) :: Nil ) val array = Array[Any](0D, 2D, 6D) val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor.getGroupId(input) + val result = DcirBiologyActExtractor(BaseExtractorCodes.empty).extractGroupId(input) // Then - result.failure.exception shouldBe an[IllegalArgumentException] + assert(result == DcirAct.groupID.DcirAct) } - "extract" should "return a Dataset of DCIR Biology Acts" in { + /* "extract" should "return a Dataset of DCIR Biology Acts" in { - val sqlCtx = sqlContext - import sqlCtx.implicits._ + val sqlCtx = sqlContext + import sqlCtx.implicits._ - // Given - val codes = Set("AAAA", "CCCC") - - val input = Seq( - ("Patient_A", "AAAA", "CCAM1", Some(makeTS(2010, 1, 1)), None, None, None, makeTS(2010, 1, 1)), - ("Patient_A", "BBBB", "CCAM1", Some(makeTS(2010, 2, 1)), Some(1D), Some(0D), Some(1D), makeTS(2010, 2, 1)), - ("Patient_B", "CCCC", "CCAM1", Some(makeTS(2010, 3, 1)), None, None, None, makeTS(2010, 3, 1)), - ("Patient_B", "CCCC", "CCAM1", Some(makeTS(2010, 4, 1)), Some(7D), Some(0D), Some(2D), makeTS(2010, 4, 1)), - ("Patient_C", "BBBB", "CCAM1", None, Some(1D), Some(0D), Some(2D), makeTS(2010, 5, 1)) - ).toDF( - ColNames.PatientID, ColNames.BioCode, ColNames.CamCode, ColNames.Date, - ColNames.InstitutionCode, ColNames.GHSCode, ColNames.Sector, ColNames.DcirFluxDate - ) + // Given + val codes = BaseExtractorCodes(List("AAAA", "CCCC")) - val sources = Sources(dcir = Some(input)) + val input = Seq( + ("Patient_A", "AAAA", "CCAM1", Some(makeTS(2010, 1, 1)), None, None, None, makeTS(2010, 1, 1)), + ("Patient_A", "BBBB", "CCAM1", Some(makeTS(2010, 2, 1)), Some(1D), Some(0D), Some(1D), makeTS(2010, 2, 1)), + ("Patient_B", "CCCC", "CCAM1", Some(makeTS(2010, 3, 1)), None, None, None, makeTS(2010, 3, 1)), + ("Patient_B", "CCCC", "CCAM1", Some(makeTS(2010, 4, 1)), Some(7D), Some(0D), Some(2D), makeTS(2010, 4, 1)), + ("Patient_C", "BBBB", "CCAM1", None, Some(1D), Some(0D), Some(2D), makeTS(2010, 5, 1)) + ).toDF( + colNames.PatientID, colNames.BioCode, colNames.CamCode, colNames.FlowDistributionDate, + colNames.InstitutionCode, colNames.GHSCode, colNames.Sector, colNames.DcirFluxDate + ) - val expected = Seq[Event[MedicalAct]]( - BiologyDcirAct("Patient_A", BiologyDcirAct.groupID.Liberal, "AAAA", 1.0, makeTS(2010, 1, 1)), - BiologyDcirAct("Patient_B", BiologyDcirAct.groupID.Liberal, "CCCC", 1.0, makeTS(2010, 3, 1)), - BiologyDcirAct("Patient_B", BiologyDcirAct.groupID.PrivateAmbulatory, "CCCC", 1.0, makeTS(2010, 4, 1)) - ).toDS + val sources = Sources(dcir = Some(input)) - // When - val result = DcirBiologyActExtractor.extract(sources, codes) + val expected = Seq[Event[MedicalAct]]( + BiologyDcirAct("Patient_A", BiologyDcirAct.groupID.Liberal, "AAAA", 1.0, makeTS(2010, 1, 1)), + BiologyDcirAct("Patient_B", BiologyDcirAct.groupID.Liberal, "CCCC", 1.0, makeTS(2010, 3, 1)), + BiologyDcirAct("Patient_B", BiologyDcirAct.groupID.PrivateAmbulatory, "CCCC", 1.0, makeTS(2010, 4, 1)) + ).toDS - // Then - assertDSs(result, expected) - } + // When + val result = DcirBiologyActExtractor(codes).extract(sources) + + // Then + assertDSs(result, expected) + }*/ "extract" should "return a Dataset of DCIR Biology Acts from raw data" in { @@ -252,18 +179,18 @@ class DcirBiologyActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val codes = Set("238") + val codes = BaseExtractorCodes(List("238")) val input = sqlCtx.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") val sources = Sources(dcir = Some(input)) val expected = Seq[Event[MedicalAct]]( - BiologyDcirAct("Patient_01", BiologyDcirAct.groupID.Liberal, "238", 1.0, makeTS(2006, 1, 15)) + BiologyDcirAct("Patient_01", BiologyDcirAct.groupID.Liberal, "238", 0.0, makeTS(2006, 1, 15)) ).toDS // When - val result = DcirBiologyActExtractor.extract(sources, codes) + val result = DcirBiologyActExtractor(codes).extract(sources) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala index 082de8b7..9350a843 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala @@ -2,249 +2,62 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts -import scala.util.Success -import org.scalatest.matchers.should.Matchers.{an, convertToAnyShouldWrapper} -import org.scalatest.TryValues._ -import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types._ import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, Event, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.functions._ +import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirSource class DcirMedicalActsSuite extends SharedContext { - import DcirMedicalActExtractor.ColNames + val colNames = new DcirSource {}.ColNames val schema = StructType( - StructField(ColNames.PatientID, StringType) :: - StructField(ColNames.CamCode, StringType) :: - StructField(ColNames.InstitutionCode, DoubleType) :: - StructField(ColNames.GHSCode, DoubleType) :: - StructField(ColNames.Sector, DoubleType) :: - StructField(ColNames.Date, DateType) :: Nil + StructField(colNames.PatientID, StringType) :: + StructField(colNames.CamCode, StringType) :: + StructField(colNames.InstitutionCode, DoubleType) :: + StructField(colNames.GHSCode, DoubleType) :: + StructField(colNames.Sector, DoubleType) :: + StructField(colNames.FlowDistributionDate, DateType) :: Nil ) val oldSchema = StructType( - StructField(ColNames.PatientID, StringType) :: - StructField(ColNames.CamCode, StringType) :: - StructField(ColNames.Date, DateType) :: Nil + StructField(colNames.PatientID, StringType) :: + StructField(colNames.CamCode, StringType) :: + StructField(colNames.FlowDistributionDate, DateType) :: Nil ) - "isInStudy" should "return true when a study code is found in the row" in { - - // Given - val codes = Set("AAAA", "BBBB") - val inputArray = Array[Any]("Patient_A", "AAAA", null, null, null, makeTS(2010, 1, 1)) - val inputRow = new GenericRowWithSchema(inputArray, schema) - - // When - val result = DcirMedicalActExtractor.isInStudy(codes)(inputRow) - - // Then - assert(result) - } - - it should "return false when no code is found in the row" in { - - // Given - val codes = Set("AAAA", "BBBB") - val inputArray = Array[Any]("Patient_A", "CCCC", 1D, 0D, 1D, makeTS(2010, 1, 1)) - val inputRow = new GenericRowWithSchema(inputArray, schema) - - // When - val result = DcirMedicalActExtractor.isInStudy(codes)(inputRow) - - // Then - assert(!result) - } - - "builder" should "return a DCIR act if the event is in a older version of DCIR" in { - // Given - val inputArray = Array[Any]("Patient_A", "AAAA", makeTS(2010, 1, 1)) - val inputRow = new GenericRowWithSchema(inputArray, oldSchema) - val expected = Seq(DcirAct("Patient_A", DcirAct.groupID.DcirAct, "AAAA", 1.0, makeTS(2010, 1, 1))) - - // When - val result = DcirMedicalActExtractor.builder(inputRow) - - // Then - assert(result == expected) - } - - "getGHS" should "return the value in the correct column" in { - // Given - val schema = StructType(StructField(ColNames.GHSCode, DoubleType) :: Nil) - val inputArray = Array[Any](3D) - val input = new GenericRowWithSchema(inputArray, schema) - val expected = 3D - - // When - val result = DcirMedicalActExtractor.getGHS(input) - - // Then - assert(result == expected) - } - - "getSector" should "return the expected value" in { - // Given - val schema = StructType(StructField(ColNames.Sector, DoubleType) :: Nil) - val inputArray = Array[Any](3D) - val input = new GenericRowWithSchema(inputArray, schema) - val expected = 3D - - // When - val result = DcirMedicalActExtractor.getSector(input) - - // Then - assert(result == expected) - } - - "getInstitutionCode" should "return the value in the correct column" in { - // Given - val schema = StructType(StructField(ColNames.InstitutionCode, DoubleType) :: Nil) - val inputArray = Array[Any](52D) - val input = new GenericRowWithSchema(inputArray, schema) - val expected = 52D - - // When - val result = DcirMedicalActExtractor.getInstitutionCode(input) - - // Then - assert(result == expected) - - } - - "getGroupID" should "return correct status of private ambulatory" in { - // Given - val schema = StructType( - StructField(ColNames.GHSCode, DoubleType) :: - StructField(ColNames.Sector, StringType) :: - StructField(ColNames.InstitutionCode, DoubleType) :: Nil - ) - val array = Array[Any](0D, 2D, 6D) - val input = new GenericRowWithSchema(array, schema) - val expected = Success(DcirAct.groupID.PrivateAmbulatory) - - // When - val result = DcirMedicalActExtractor.getGroupId(input) - - // Then - assert(result == expected) - - } - - it should "return Success(PublicAmbulatory) if it is public related" in { - // Given - val schema = StructType(StructField(ColNames.Sector, StringType) :: Nil) - val array = Array[Any](1D) - val input = new GenericRowWithSchema(array, schema) - // When - val result = DcirMedicalActExtractor.getGroupId(input) - - // Then - result.success.value shouldBe DcirAct.groupID.PublicAmbulatory - } - - it should "return Success(Liberal) if it is liberal act" in { - // Given - val schema = StructType( - StructField(ColNames.Sector, StringType) :: StructField( - ColNames.GHSCode, - StringType - ) :: Nil - ) - val array = Array[Any](null, null) - val input = new GenericRowWithSchema(array, schema) - // When - val result = DcirMedicalActExtractor.getGroupId(input) - - // Then - result.success.value shouldBe DcirAct.groupID.Liberal - } - - it should "return Success(PrivateAmbulatory) if it is private ambulatory act" in { - // Given - val schema = StructType( - StructField(ColNames.Sector, StringType) :: StructField( - ColNames.GHSCode, - DoubleType - ) :: StructField(ColNames.InstitutionCode, DoubleType) :: Nil - ) - val array = Array[Any](null, 0D, 4D) - val input = new GenericRowWithSchema(array, schema) - // When - val result = DcirMedicalActExtractor.getGroupId(input) - - // Then - result.success.value shouldBe DcirAct.groupID.PrivateAmbulatory - } - - it should "return Success(UnkownSource) if it is an act with unknown source" in { - // Given - val schema = StructType( - StructField(ColNames.Sector, StringType) :: StructField( - ColNames.GHSCode, - DoubleType - ) :: StructField(ColNames.InstitutionCode, DoubleType) :: Nil - ) - val array = Array[Any](null, 1D, 4D) - val input = new GenericRowWithSchema(array, schema) - // When - val result = DcirMedicalActExtractor.getGroupId(input) - - // Then - result.success.value shouldBe DcirAct.groupID.Unknown - } - - it should "return IllegalArgumentException if the information of source of act is unavailable in DCIR" in { - // Given - val schema = StructType( - StructField(ColNames.GHSCode, DoubleType) :: - StructField(ColNames.Sector, StringType) :: Nil - ) - val array = Array[Any](0D, 2D, 6D) - val input = new GenericRowWithSchema(array, schema) - // When - val result = DcirMedicalActExtractor.getGroupId(input) - - // Then - result.failure.exception shouldBe an[IllegalArgumentException] - } - - "extract" should "return a Dataset of DCIR Medical Acts" in { - - val sqlCtx = sqlContext - import sqlCtx.implicits._ - - // Given - val codes = Set("AAAA", "CCCC", "DDDD") - - val input = Seq( - ("Patient_A", "AAAA", "NABM1", makeTS(2010, 1, 1), None, None, None), - ("Patient_A", "BBBB", "NABM1", makeTS(2010, 2, 1), Some(1D), Some(0D), Some(1D)), - ("Patient_B", "CCCC", "NABM1", makeTS(2010, 3, 1), None, None, None), - ("Patient_B", "CCCC", "NABM1", makeTS(2010, 4, 1), Some(7D), Some(0D), Some(2D)), - ("Patient_C", "BBBB", "NABM1", makeTS(2010, 5, 1), Some(1D), Some(0D), Some(2D)), - ("Patient_D", "DDDD", "NABM1", null, None, None, None) - ).toDF( - ColNames.PatientID, ColNames.CamCode, ColNames.BioCode, ColNames.Date, - ColNames.InstitutionCode, ColNames.GHSCode, ColNames.Sector - ) - - val sources = Sources(dcir = Some(input)) - - val expected = Seq[Event[MedicalAct]]( - DcirAct("Patient_A", DcirAct.groupID.Liberal, "AAAA", 1.0, makeTS(2010, 1, 1)), - DcirAct("Patient_B", DcirAct.groupID.Liberal, "CCCC", 1.0, makeTS(2010, 3, 1)), - DcirAct("Patient_B", DcirAct.groupID.PrivateAmbulatory, "CCCC", 1.0, makeTS(2010, 4, 1)), - DcirAct("Patient_D", DcirAct.groupID.Liberal, "DDDD", 1.0, makeTS(1970, 1, 1)) - ).toDS - - // When - val result = DcirMedicalActExtractor.extract(sources, codes) - - // Then - assertDSs(result, expected) - } + /* "extract" should "return a Dataset of DCIR Medical Acts" in { + + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val codes = BaseExtractorCodes(List("AAAA", "CCCC", "DDDD")) + + val input = Seq( + ("Patient_A", "AAAA", "NABM1", makeTS(2010, 1, 1), None, None, None), + ("Patient_A", "BBBB", "NABM1", makeTS(2010, 2, 1), Some(1D), Some(0D), Some(1D)), + ("Patient_B", "CCCC", "NABM1", makeTS(2010, 3, 1), None, None, None), + ("Patient_B", "CCCC", "NABM1", makeTS(2010, 4, 1), Some(7D), Some(0D), Some(2D)), + ("Patient_C", "BBBB", "NABM1", makeTS(2010, 5, 1), Some(1D), Some(0D), Some(2D)), + ("Patient_D", "DDDD", "NABM1", null, None, None, None) + ).toDF( + colNames.PatientID, colNames.CamCode, colNames.BioCode, colNames.FlowDistributionDate, + colNames.InstitutionCode, colNames.GHSCode, colNames.Sector + ) + + val sources = Sources(dcir = Some(input)) + + val expected = Seq[Event[MedicalAct]]( + DcirAct("Patient_A", DcirAct.groupID.Liberal, "AAAA", 1.0, makeTS(2010, 1, 1)), + DcirAct("Patient_B", DcirAct.groupID.Liberal, "CCCC", 1.0, makeTS(2010, 3, 1)), + DcirAct("Patient_B", DcirAct.groupID.PrivateAmbulatory, "CCCC", 1.0, makeTS(2010, 4, 1)), + DcirAct("Patient_D", DcirAct.groupID.Liberal, "DDDD", 1.0, makeTS(1970, 1, 1)) + ).toDS + + // When + val result = DcirMedicalActExtractor(codes).extract(sources) + + // Then + assertDSs(result, expected) + }*/ } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadMedicalActsSuite.scala index 539089e6..45d79e6e 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadMedicalActsSuite.scala @@ -1,18 +1,19 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.events.{Event, HadCCAMAct, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.functions._ +import fr.polytechnique.cmap.cnam.util.functions.makeTS class HadMedicalActsSuite extends SharedContext { - + "extract" should "return a DataSet of HadCCAMActs" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val ccamCodes = Set("HPQD001") + val ccamCodes = BaseExtractorCodes(List("HPQD001")) val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") val expected = Seq[Event[MedicalAct]]( HadCCAMAct("patient02", "10000201_30000150_2019", "HPQD001", makeTS(2019, 12, 24)), @@ -21,7 +22,7 @@ class HadMedicalActsSuite extends SharedContext { val input = Sources(had = Some(had)) // When - val result = HadCcamActExtractor.extract(input, ccamCodes) + val result = HadCcamActExtractor(ccamCodes).extract(input) // Then assertDSs(result, expected) @@ -41,7 +42,7 @@ class HadMedicalActsSuite extends SharedContext { val input = Sources(had = Some(had)) // When - val result = HadCcamActExtractor.extract(input, Set.empty) + val result = HadCcamActExtractor(BaseExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala index 1201e471..d29e680c 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala @@ -2,31 +2,31 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts -import java.sql.Date - import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCEAct, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCeCcamAct, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeSource import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS class McoCEMedicalActsSuite extends SharedContext { "isInStudy" should "return true if row is in study" in { - import McoCeActExtractor.ColNames + val colNames = new McoCeSource {}.ColNames // Given - val codes = Set("coloscopie") + val codes = BaseExtractorCodes(List("coloscopie")) val schema = StructType( - StructField(ColNames.PatientID, StringType) :: - StructField(ColNames.CamCode, StringType) :: - StructField(ColNames.Date, StringType) :: Nil + StructField(colNames.PatientID, StringType) :: + StructField(colNames.CamCode, StringType) :: + StructField(colNames.Date, StringType) :: Nil ) val data = Array[Any]("George", "coloscopie", "23012010") val input = new GenericRowWithSchema(data, schema) // When - val result = McoCeActExtractor.isInStudy(codes)(input) + val result = McoCeCcamActExtractor(codes).isInStudy(input) // Then assert(result) @@ -36,15 +36,15 @@ class McoCEMedicalActsSuite extends SharedContext { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val cim10Codes = Set("DEM") + val cim10Codes = BaseExtractorCodes(List("DEM")) val mcoCe = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") val expected = Seq[Event[MedicalAct]]( - McoCEAct("200410", "190000059_00022621_2014", "DEMP002", makeTS(2014, 4, 18)) + McoCeCcamAct("200410", "190000059_00022621_2014", "DEMP002", makeTS(2014, 4, 18)) ).toDS val input = Sources(mcoCe = Some(mcoCe)) // When - val result = McoCeActExtractor.extract(input, cim10Codes) + val result = McoCeCcamActExtractor(cim10Codes).extract(input) // Then assertDSs(expected, result) @@ -54,17 +54,16 @@ class McoCEMedicalActsSuite extends SharedContext { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val cim10Codes = Set("DEM") val mcoCe = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") val expected = Seq[Event[MedicalAct]]( - McoCEAct("200410", "190000059_00022621_2014", "DEMP002", makeTS(2014, 4, 18)), - McoCEAct("2004100010", "390780146_00098382_2014", "DZQM006", makeTS(2014, 11, 6)), - McoCEAct("2004100010", "390780146_00015211_2014", "DEQP005", makeTS(2014, 2, 11)) + McoCeCcamAct("200410", "190000059_00022621_2014", "DEMP002", makeTS(2014, 4, 18)), + McoCeCcamAct("2004100010", "390780146_00098382_2014", "DZQM006", makeTS(2014, 11, 6)), + McoCeCcamAct("2004100010", "390780146_00015211_2014", "DEQP005", makeTS(2014, 2, 11)) ).toDS val input = Sources(mcoCe = Some(mcoCe)) // When - val result = McoCeActExtractor.extract(input, Set.empty) + val result = McoCeCcamActExtractor(BaseExtractorCodes.empty).extract(input) // Then assertDSs(expected, result) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala index 0bd1ff89..a75d653f 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala @@ -4,61 +4,18 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions._ class McoMedicalActsSuite extends SharedContext { - "extract" should "return a DataSet of McoCIM10Act" in { - val sqlCtx = sqlContext - import sqlCtx.implicits._ - - // Given - val cim10Codes = Set("C670") - val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") - val expected = Seq[Event[MedicalAct]]( - McoCIM10Act("Patient_02", "10000123_10000987_2006", "C670", makeTS(2005, 12, 31)), - McoCIM10Act("Patient_02", "10000123_20000123_2007", "C670", makeTS(2007, 1, 31)), - McoCIM10Act("Patient_02", "10000123_30000546_2008", "C670", makeTS(2008, 3, 10)) - ).toDS - - val input = Sources(mco = Some(mco)) - // When - val result = McoCimMedicalActExtractor.extract(input, cim10Codes) - - // Then - assertDSs(result, expected) - } - - it should "return all available McoCIM10Act when codes is Empty" in { - val sqlCtx = sqlContext - import sqlCtx.implicits._ - - // Given - val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") - val expected = Seq[Event[MedicalAct]]( - McoCIM10Act("Patient_02", "10000123_10000543_2006", "C671", makeTS(2005, 12, 26)), - McoCIM10Act("Patient_02", "10000123_10000987_2006", "C670", makeTS(2005, 12, 31)), - McoCIM10Act("Patient_02", "10000123_20000123_2007", "C670", makeTS(2007, 1, 31)), - McoCIM10Act("Patient_02", "10000123_20000345_2007", "C671", makeTS(2007, 1, 31)), - McoCIM10Act("Patient_02", "10000123_30000546_2008", "C670", makeTS(2008, 3, 10)), - McoCIM10Act("Patient_02", "10000123_30000852_2008", "C671", makeTS(2008, 3, 17)) - ).toDS - - val input = Sources(mco = Some(mco)) - // When - val result = McoCimMedicalActExtractor.extract(input, Set.empty) - - // Then - assertDSs(result, expected) - } - "extract" should "return a DataSet of McoCCAMActs" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val ccamCodes = Set("AAAA123") + val ccamCodes = BaseExtractorCodes(List("AAAA123")) val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val expected = Seq[Event[MedicalAct]]( McoCCAMAct("Patient_02", "10000123_10000987_2006", "AAAA123", makeTS(2005, 12, 31)), @@ -68,7 +25,7 @@ class McoMedicalActsSuite extends SharedContext { val input = Sources(mco = Some(mco)) // When - val result = McoCcamActExtractor.extract(input, ccamCodes) + val result = McoCcamActExtractor(ccamCodes).extract(input) // Then assertDSs(result, expected) @@ -91,7 +48,7 @@ class McoMedicalActsSuite extends SharedContext { val input = Sources(mco = Some(mco)) // When - val result = McoCcamActExtractor.extract(input, Set.empty) + val result = McoCcamActExtractor(BaseExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala index 61bc8224..70543a11 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala @@ -2,32 +2,34 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types.{DateType, StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Event, MedicalAct, SsrCEAct} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.ssrce.SsrCeSource import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS -import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema -import org.apache.spark.sql.types._ class SsrCEMedicalActsSuite extends SharedContext { - import SsrCeActExtractor.ColNames + val colNames = new SsrCeSource {}.ColNames val schema = StructType( - StructField(ColNames.PatientID, StringType) :: - StructField(ColNames.CamCode, StringType) :: - StructField(ColNames.Date, DateType) :: Nil + StructField(colNames.PatientID, StringType) :: + StructField(colNames.CamCode, StringType) :: + StructField(colNames.StartDate, DateType) :: Nil ) "isInStudy" should "return true when a study code is found in the row" in { // Given - val codes = Set("AAAA", "BBBB") + val codes = BaseExtractorCodes(List("AAAA", "BBBB")) val inputArray = Array[Any]("Patient_A", "AAAA", makeTS(2010, 1, 1)) val inputRow = new GenericRowWithSchema(inputArray, schema) // When - val result = SsrCeActExtractor.isInStudy(codes)(inputRow) + val result = SsrCeActExtractor(codes).isInStudy(inputRow) // Then assert(result) @@ -36,12 +38,12 @@ class SsrCEMedicalActsSuite extends SharedContext { it should "return false when no code is found in the row" in { // Given - val codes = Set("AAAA", "BBBB") + val codes = BaseExtractorCodes(List("AAAA", "BBBB")) val inputArray = Array[Any]("Patient_A", "CCCC", makeTS(2010, 1, 1)) val inputRow = new GenericRowWithSchema(inputArray, schema) // When - val result = SsrCeActExtractor.isInStudy(codes)(inputRow) + val result = SsrCeActExtractor(codes).isInStudy(inputRow) // Then assert(!result) @@ -53,7 +55,7 @@ class SsrCEMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val codes = Set("AAAA", "CCCC") + val codes = BaseExtractorCodes(List("AAAA", "CCCC")) val input = Seq( ("Patient_A", "AAAA", makeTS(2010, 1, 1)), @@ -62,19 +64,19 @@ class SsrCEMedicalActsSuite extends SharedContext { ("Patient_B", "CCCC", makeTS(2010, 4, 1)), ("Patient_C", "BBBB", makeTS(2010, 5, 1)) ).toDF( - ColNames.PatientID, ColNames.CamCode, ColNames.Date + colNames.PatientID, colNames.CamCode, colNames.StartDate ) val sources = Sources(ssrCe = Some(input)) val expected = Seq[Event[MedicalAct]]( - SsrCEAct("Patient_A", "ACE", "AAAA", 0.0, makeTS(2010, 1, 1)), - SsrCEAct("Patient_B", "ACE", "CCCC", 0.0, makeTS(2010, 3, 1)), - SsrCEAct("Patient_B", "ACE", "CCCC", 0.0, makeTS(2010, 4, 1)) + SsrCEAct("Patient_A", "NA", "AAAA", 0.0, makeTS(2010, 1, 1)), + SsrCEAct("Patient_B", "NA", "CCCC", 0.0, makeTS(2010, 3, 1)), + SsrCEAct("Patient_B", "NA", "CCCC", 0.0, makeTS(2010, 4, 1)) ).toDS // When - val result = SsrCeActExtractor.extract(sources, codes) + val result = SsrCeActExtractor(codes).extract(sources) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala index 8587be84..2c38dd60 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala @@ -2,6 +2,7 @@ package fr.polytechnique.cmap.cnam.etl.extractors.acts import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions._ @@ -12,7 +13,7 @@ class SsrMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ccamCodes = Set("AHQP001") + val ccamCodes = BaseExtractorCodes(List("AHQP001")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[MedicalAct]]( SsrCCAMAct("Patient_02", "10000123_30000546_200_2019", "AHQP001", makeTS(2019, 8, 11)), @@ -21,7 +22,7 @@ class SsrMedicalActsSuite extends SharedContext { val input = Sources(ssr = Some(ssr)) // When - val result = SsrCcamActExtractor.extract(input, ccamCodes) + val result = SsrCcamActExtractor(ccamCodes).extract(input) // Then assertDSs(result, expected) @@ -42,7 +43,7 @@ class SsrMedicalActsSuite extends SharedContext { val input = Sources(ssr = Some(ssr)) // When - val result = SsrCcamActExtractor.extract(input, Set.empty) + val result = SsrCcamActExtractor(BaseExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) @@ -53,7 +54,7 @@ class SsrMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ccamCodes = Set("BLR+156") + val ccarrCodes = BaseExtractorCodes(List("BLR+156")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[MedicalAct]]( SsrCSARRAct("Patient_02", "10000123_30000546_200_2019", "BLR+156", makeTS(2019, 8, 11)), @@ -62,7 +63,7 @@ class SsrMedicalActsSuite extends SharedContext { val input = Sources(ssr = Some(ssr)) // When - val result = SsrCsarrActExtractor.extract(input, ccamCodes) + val result = SsrCsarrActExtractor(ccarrCodes).extract(input) // Then assertDSs(result, expected) @@ -83,7 +84,7 @@ class SsrMedicalActsSuite extends SharedContext { val input = Sources(ssr = Some(ssr)) // When - val result = SsrCsarrActExtractor.extract(input, Set.empty) + val result = SsrCsarrActExtractor(BaseExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GHMClassificationsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GHMClassificationsSuite.scala index c87b4990..4f530024 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GHMClassificationsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GHMClassificationsSuite.scala @@ -4,6 +4,7 @@ package fr.polytechnique.cmap.cnam.etl.extractors.classifications import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.GHMClassification +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions._ @@ -15,7 +16,7 @@ class GHMClassificationsSuite extends SharedContext { // Given val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") - val ghmCodes = Set("12H50L") + val ghmCodes = BaseExtractorCodes(List("12H50L")) val expected = Seq( GHMClassification("Patient_02", "10000123_20000123_2007", "12H50L", makeTS(2007, 1, 29)), @@ -25,7 +26,7 @@ class GHMClassificationsSuite extends SharedContext { val sources = Sources(mco = Some(mco)) // When - val result = GhmExtractor.extract(sources, ghmCodes) + val result = GhmExtractor(ghmCodes).extract(sources) // Then assertDSs(result, expected) @@ -50,7 +51,7 @@ class GHMClassificationsSuite extends SharedContext { val sources = Sources(mco = Some(mco)) // When - val result = GhmExtractor.extract(sources, Set.empty) + val result = GhmExtractor(BaseExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosesSuite.scala index 0cbb83fd..4fd73886 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosesSuite.scala @@ -1,7 +1,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event, HadAssociatedDiagnosis, HadMainDiagnosis} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -12,7 +13,7 @@ class HadDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val dpCodes = Set("G970") + val dpCodes = BaseExtractorCodes(List("G970")) val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") val sources = Sources(had = Some(had)) @@ -22,7 +23,7 @@ class HadDiagnosesSuite extends SharedContext { ).toDS // When - val result = HadMainDiagnosisExtractor.extract(sources, dpCodes) + val result = HadMainDiagnosisExtractor(dpCodes).extract(sources) // Then assertDSs(result, expected) @@ -43,7 +44,7 @@ class HadDiagnosesSuite extends SharedContext { ).toDS // When - val result = HadMainDiagnosisExtractor.extract(sources, Set.empty) + val result = HadMainDiagnosisExtractor(BaseExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) @@ -54,7 +55,7 @@ class HadDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val associatedDiagnosis = Set("G9") + val associatedDiagnosis = BaseExtractorCodes(List("G9")) val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") val sources = Sources(had = Some(had)) @@ -65,7 +66,7 @@ class HadDiagnosesSuite extends SharedContext { ).toDS // When - val result = HadAssociatedDiagnosisExtractor.extract(sources, associatedDiagnosis) + val result = HadAssociatedDiagnosisExtractor(associatedDiagnosis).extract(sources) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala index b1bc657f..e59b2757 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala @@ -3,7 +3,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.ImbDiagnosis +import fr.polytechnique.cmap.cnam.etl.events.ImbCcamDiagnosis +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -15,11 +16,11 @@ class ImbDiagnosesSuite extends SharedContext { // Given val imb = sqlContext.read.load("src/test/resources/test-input/IR_IMB_R.parquet") - val expected = Seq(ImbDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13), Some(makeTS(2016, 3, 13)))).toDS + val expected = Seq(ImbCcamDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13), Some(makeTS(2016, 3, 13)))).toDS val sources = Sources(irImb = Some(imb)) // When - val output = ImbDiagnosisExtractor.extract(sources, Set("C67")) + val output = ImbCimDiagnosisExtractor(BaseExtractorCodes(List("C67"))).extract(sources) // Then assertDSs(expected, output) @@ -32,14 +33,14 @@ class ImbDiagnosesSuite extends SharedContext { // Given val imb = sqlContext.read.load("src/test/resources/test-input/IR_IMB_R.parquet") val expected = Seq( - ImbDiagnosis("Patient_02", "E11", makeTS(2006, 1, 25), Some(makeTS(2011, 1, 24))), - ImbDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13), Some(makeTS(2016, 3, 13))), - ImbDiagnosis("Patient_02", "9999", makeTS(2006, 4, 25), Some(makeTS(2016, 4, 25))) + ImbCcamDiagnosis("Patient_02", "E11", makeTS(2006, 1, 25), Some(makeTS(2011, 1, 24))), + ImbCcamDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13), Some(makeTS(2016, 3, 13))), + ImbCcamDiagnosis("Patient_02", "9999", makeTS(2006, 4, 25), Some(makeTS(2016, 4, 25))) ).toDS val sources = Sources(irImb = Some(imb)) // When - val output = ImbDiagnosisExtractor.extract(sources, Set.empty).orderBy($"start".asc) + val output = ImbCimDiagnosisExtractor(BaseExtractorCodes.empty).extract(sources) // Then assertDSs(expected, output) @@ -59,15 +60,15 @@ class ImbDiagnosesSuite extends SharedContext { ).toDF("NUM_ENQ", "MED_NCL_IDT", "MED_MTF_COD", "IMB_ALD_DTD", "IMB_ALD_DTF") val expected = Seq( - ImbDiagnosis("Patient_02", "E11", makeTS(2006, 1, 25), Some(makeTS(2011, 1, 24))), - ImbDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13), None), - ImbDiagnosis("Patient_03", "C67", makeTS(2006, 3, 13), None), - ImbDiagnosis("Patient_02", "9999", makeTS(2006, 4, 25), Some(makeTS(2016, 4, 25))) + ImbCcamDiagnosis("Patient_02", "E11", makeTS(2006, 1, 25), Some(makeTS(2011, 1, 24))), + ImbCcamDiagnosis("Patient_02", "C67", makeTS(2006, 3, 13), None), + ImbCcamDiagnosis("Patient_03", "C67", makeTS(2006, 3, 13), None), + ImbCcamDiagnosis("Patient_02", "9999", makeTS(2006, 4, 25), Some(makeTS(2016, 4, 25))) ).toDS val sources = Sources(irImb = Some(imb)) // When - val output = ImbDiagnosisExtractor.extract(sources, Set.empty).orderBy($"start".asc) + val output = ImbCimDiagnosisExtractor(BaseExtractorCodes.empty).extract(sources) // Then assertDSs(expected, output) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosesSuite.scala index 48656c1e..bf5103f5 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosesSuite.scala @@ -3,8 +3,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.McoMainDiagnosis -import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event, McoAssociatedDiagnosis, McoLinkedDiagnosis, McoMainDiagnosis} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -15,7 +15,7 @@ class McoDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val dpCodes = Set("C67") + val dpCodes = BaseExtractorCodes(List("C67")) val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val sources = Sources(mco = Some(mco)) @@ -30,7 +30,7 @@ class McoDiagnosesSuite extends SharedContext { // When - val result = McoMainDiagnosisExtractor.extract(sources, dpCodes) + val result = McoMainDiagnosisExtractor(dpCodes).extract(sources) // Then assertDSs(result, expected) @@ -41,7 +41,7 @@ class McoDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val linkedCodes = Set("E05", "E08") + val linkedCodes = BaseExtractorCodes(List("E05", "E08")) val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val sources = Sources(mco = Some(mco)) @@ -51,7 +51,7 @@ class McoDiagnosesSuite extends SharedContext { ).toDS // When - val result = McoLinkedDiagnosisExtractor.extract(sources, linkedCodes) + val result = McoLinkedDiagnosisExtractor(linkedCodes).extract(sources) // Then assertDSs(result, expected) @@ -62,7 +62,7 @@ class McoDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val associatedDiagnosis = Set("C66") + val associatedDiagnosis = BaseExtractorCodes(List("C66")) val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val sources = Sources(mco = Some(mco)) @@ -72,7 +72,7 @@ class McoDiagnosesSuite extends SharedContext { ).toDS // When - val result = McoAssociatedDiagnosisExtractor.extract(sources, associatedDiagnosis) + val result = McoAssociatedDiagnosisExtractor(associatedDiagnosis).extract(sources) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala index fcc8e363..bc520a3e 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala @@ -1,7 +1,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event, SsrAssociatedDiagnosis, SsrLinkedDiagnosis, SsrMainDiagnosis, SsrTakingOverPurpose} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -12,7 +13,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val dpCodes = Set("C66") + val dpCodes = BaseExtractorCodes(List("C66")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) @@ -24,7 +25,7 @@ class SsrDiagnosesSuite extends SharedContext { // When - val result = SsrMainDiagnosisExtractor.extract(sources, dpCodes) + val result = SsrMainDiagnosisExtractor(dpCodes).extract(sources) // Then assertDSs(result, expected) @@ -46,7 +47,7 @@ class SsrDiagnosesSuite extends SharedContext { // When - val result = SsrMainDiagnosisExtractor.extract(sources, Set.empty) + val result = SsrMainDiagnosisExtractor(BaseExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) @@ -57,18 +58,18 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val linkedCodes = Set("C6") + val linkedCodes = BaseExtractorCodes(List("C6")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) val expected = Seq[Event[Diagnosis]]( SsrLinkedDiagnosis("Patient_02", "10000123_30000546_200_2019", "C68", makeTS(2019, 8, 11)), - SsrLinkedDiagnosis("Patient_02", "10000123_30000546_300_2019", "C66", makeTS(2019, 8, 11))//, + SsrLinkedDiagnosis("Patient_02", "10000123_30000546_300_2019", "C66", makeTS(2019, 8, 11)) //, //SsrMainDiagnosis("Patient_01", "10000123_30000801_100_2019", "C55", makeTS(2019, 10, 20)) ).toDS // When - val result = SsrLinkedDiagnosisExtractor.extract(sources, linkedCodes) + val result = SsrLinkedDiagnosisExtractor(linkedCodes).extract(sources) // Then assertDSs(result, expected) @@ -79,7 +80,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val associatedDiagnosis = Set("C6") + val associatedDiagnosis = BaseExtractorCodes(List("C6")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) @@ -89,7 +90,7 @@ class SsrDiagnosesSuite extends SharedContext { ).toDS // When - val result = SsrAssociatedDiagnosisExtractor.extract(sources, associatedDiagnosis) + val result = SsrAssociatedDiagnosisExtractor(associatedDiagnosis).extract(sources) // Then assertDSs(result, expected) @@ -100,7 +101,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val cim10Codes = Set("Z100") + val cim10Codes = BaseExtractorCodes(List("Z100")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[Diagnosis]]( SsrTakingOverPurpose("Patient_02", "10000123_30000546_300_2019", "Z100", makeTS(2019, 8, 11)) @@ -108,7 +109,7 @@ class SsrDiagnosesSuite extends SharedContext { val input = Sources(ssr = Some(ssr)) // When - val result = SsrTakingOverPurposeExtractor.extract(input, cim10Codes) + val result = SsrTakingOverPurposeExtractor(cim10Codes).extract(input) // Then assertDSs(result, expected) @@ -128,7 +129,7 @@ class SsrDiagnosesSuite extends SharedContext { val input = Sources(ssr = Some(ssr)) // When - val result = SsrTakingOverPurposeExtractor.extract(input, Set.empty) + val result = SsrTakingOverPurposeExtractor(BaseExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala index 21d51f13..d5c466e5 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala @@ -123,7 +123,7 @@ class DrugsExtractorSuite extends SharedContext { val drugConf = DrugConfig(Cip13Level, List.empty) // When - val result: Dataset[Event[Drug]] = new DrugExtractor(drugConf).extract(source, Set.empty) + val result: Dataset[Event[Drug]] = DrugExtractor(drugConf).extract(source) // Then assertDSs(result, expected) @@ -225,7 +225,7 @@ class DrugsExtractorSuite extends SharedContext { val drugConf = DrugConfig(TherapeuticLevel, List(Antidepresseurs)) // When - val result = new DrugExtractor(drugConf).extract(source, Set.empty) + val result = DrugExtractor(drugConf).extract(source) // Then assertDSs(result, expected) @@ -295,7 +295,7 @@ class DrugsExtractorSuite extends SharedContext { val drugConf = DrugConfig(TherapeuticLevel, List(Neuroleptiques)) // When - val result = new DrugExtractor(drugConf).extract(source, Set.empty) + val result = DrugExtractor(drugConf).extract(source) // Then assertDSs(result, expected) @@ -364,7 +364,7 @@ class DrugsExtractorSuite extends SharedContext { val drugConf = DrugConfig(TherapeuticLevel, List(Hypnotiques)) // When - val result = new DrugExtractor(drugConf).extract(source, Set.empty) + val result = DrugExtractor(drugConf).extract(source) // Then assertDSs(result, expected) @@ -428,7 +428,7 @@ class DrugsExtractorSuite extends SharedContext { val drugConf = DrugConfig(TherapeuticLevel, List(Antihypertenseurs)) // When - val result = new DrugExtractor(drugConf).extract(source, Set.empty) + val result = DrugExtractor(drugConf).extract(source) // Then assertDSs(result, expected) @@ -553,7 +553,7 @@ class DrugsExtractorSuite extends SharedContext { val drugConfigNeuroleptiques: DrugClassConfig = Neuroleptiques val drugConf = DrugConfig(TherapeuticLevel, List(drugConfigAntidepresseurs, drugConfigNeuroleptiques)) // When - val result = new DrugExtractor(drugConf).extract(source, Set.empty) + val result = new DrugExtractor(drugConf).extract(source) // Then assertDSs(result, expected) } @@ -677,7 +677,7 @@ class DrugsExtractorSuite extends SharedContext { val drugConfigNeuroleptiques: DrugClassConfig = Neuroleptiques val drugConf = DrugConfig(PharmacologicalLevel, List(drugConfigAntidepresseurs, drugConfigNeuroleptiques)) // When - val result = new DrugExtractor(drugConf).extract(source, Set.empty) + val result = DrugExtractor(drugConf).extract(source) // Then assertDSs(result, expected) @@ -799,7 +799,7 @@ class DrugsExtractorSuite extends SharedContext { val drugConfigNeuroleptiques: DrugClassConfig = Neuroleptiques val drugConf = DrugConfig(MoleculeCombinationLevel, List(drugConfigAntidepresseurs, drugConfigNeuroleptiques)) // When - val result = new DrugExtractor(drugConf).extract(source, Set("SHIT")) + val result = DrugExtractor(drugConf).extract(source) // Then assertDSs(result, expected) @@ -826,7 +826,7 @@ class DrugsExtractorSuite extends SharedContext { val drugConf = DrugConfig(Cip13Level, List.empty) // When - val result = new DrugExtractor(drugConf).extractGroupId(r) + val result = DrugExtractor(drugConf).extractGroupId(r) // Then assert(result == expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSourceSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSourceSuite.scala index 799a9678..cf5db8e8 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSourceSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSourceSuite.scala @@ -1,9 +1,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors.had -import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.util.functions.makeTS import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.util.functions.makeTS class HadSourceSuite extends SharedContext with HadSource { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStayExtractorSuite.scala index 336f10ba..aa19bfef 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStayExtractorSuite.scala @@ -1,10 +1,10 @@ package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays +import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Event, HadHospitalStay, HospitalStay} import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS -import org.apache.spark.sql.Dataset class HadHospitalStayExtractorSuite extends SharedContext { @@ -23,7 +23,7 @@ class HadHospitalStayExtractorSuite extends SharedContext { ).toDS() //When - val result: Dataset[Event[HospitalStay]] = HadHospitalStaysExtractor.extract(sources, Set.empty) + val result: Dataset[Event[HospitalStay]] = HadHospitalStaysExtractor.extract(sources) //Then assertDSs(expected, result) @@ -44,7 +44,7 @@ class HadHospitalStayExtractorSuite extends SharedContext { ).toDS() //When - val result: Dataset[Event[HospitalStay]] = HadHospitalStaysExtractor.extract(sources, Set("Test")) + val result: Dataset[Event[HospitalStay]] = HadHospitalStaysExtractor.extract(sources) //Then assertDSs(expected, result) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala index 508fa0b8..f97fece3 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala @@ -28,7 +28,7 @@ class McoHospitalStayExtractorSuite extends SharedContext { ).toDS() //When - val result: Dataset[Event[HospitalStay]] = McoHospitalStaysExtractor.extract(sources, Set.empty) + val result: Dataset[Event[HospitalStay]] = McoHospitalStaysExtractor.extract(sources) //Then assertDSs(expected, result) @@ -52,13 +52,13 @@ class McoHospitalStayExtractorSuite extends SharedContext { ).toDS() //When - val result: Dataset[Event[HospitalStay]] = McoHospitalStaysExtractor.extract(sources, Set("Test")) + val result: Dataset[Event[HospitalStay]] = McoHospitalStaysExtractor.extract(sources) //Then assertDSs(expected, result) } - "extract" should "calculate correct weight from mco sources" in { + "extractWeight" should "calculate correct weight from mco sources" in { //Given val sqlCtx = sqlContext import sqlCtx.implicits._ @@ -71,18 +71,10 @@ class McoHospitalStayExtractorSuite extends SharedContext { ("Patient_02", "10000123", "20000987", "2007", makeTS(2007, 5, 1), makeTS(2007, 5, 10), null, null) ).toDF("NUM_ENQ", "ETA_NUM", "RSA_NUM", "SOR_ANN", "EXE_SOI_DTD", "EXE_SOI_DTF", "MCO_B__ENT_MOD", "MCO_B__ENT_PRV") - val sources = Sources(mco = Some(df)) - - val expected: Dataset[Event[HospitalStay]] = Seq( - McoHospitalStay("Patient_02", "10000123_20000123_2007", 8.5D, makeTS(2007, 1, 1), makeTS(2007, 1, 10)), - McoHospitalStay("Patient_02", "10000123_20000345_2007", 8.5D, makeTS(2007, 2, 1), makeTS(2007, 2, 10)), - McoHospitalStay("Patient_02", "10000123_20000546_2007", 8.8D, makeTS(2007, 3, 1), makeTS(2007, 3, 10)), - McoHospitalStay("Patient_02", "10000123_20000852_2007", 8.0D, makeTS(2007, 4, 1), makeTS(2007, 4, 10)), - McoHospitalStay("Patient_02", "10000123_20000987_2007", -1.0D, makeTS(2007, 5, 1), makeTS(2007, 5, 10)) - ).toDS() + val expected = Seq(8.5D, 8.5D, 8.8D, 8.0D, -1.0D).toDS() //When - val result: Dataset[Event[HospitalStay]] = McoHospitalStaysExtractor.extract(sources, Set.empty[String]) + val result = df.map(r => McoHospitalStaysExtractor.extractWeight(r)) //Then assertDSs(expected, result) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala index 89f8ad7d..765f9593 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala @@ -13,17 +13,27 @@ class McoceEmergenciesExtractorSuite extends SharedContext { import sqlCtx.implicits._ val df = Seq( - ("20041", "830100525", "00030885", "2012", makeTS(2012, 4, 21), makeTS(2012, 4, 21), "ATU"), - ("20041", "830100525", "00032716", "2012", makeTS(2012, 4, 28), makeTS(2012, 4, 29), "ATU"), - ("20041", "830100525", "00032738", "2012", makeTS(2012, 4, 29), makeTS(2012, 4, 29), "ATU"), - ("20041", "830100525", "00032038", "2013", makeTS(2013, 4, 29), makeTS(2013, 4, 29), "FTN"), - ("200410", "190000059", "00044158", null, makeTS(2010, 3, 5), makeTS(2010, 3, 5), null), - ("200410", "190000059", "00027825", null, makeTS(2011, 5, 13), makeTS(2011, 5, 13), null), - ("200410", "190000059", "00020161", null, makeTS(2012, 4, 10), makeTS(2012, 4, 10), null), - ("200410", "190000059", "00022621", null, makeTS(2014, 4, 18), makeTS(2014, 5, 18), null), - ("2004838055", "680000395", "00018597", "2010", makeTS(2010, 7, 11), makeTS(2010, 7, 11), "ATU F"), - ("2006191920", "680000395", "00009656", "2013", makeTS(2013, 9, 24), makeTS(2013, 9, 24), "ATU N") - ).toDF("NUM_ENQ", "ETA_NUM", "SEQ_NUM", "MCO_FBSTC__SOR_ANN", "EXE_SOI_DTD", "EXE_SOI_DTF", "MCO_FBSTC__ACT_COD") + ("20041", "830100525", "00030885", "2012", makeTS(2012, 4, 21), makeTS(2012, 4, 21), "ATU", 2012), + ("20041", "830100525", "00032716", "2012", makeTS(2012, 4, 28), makeTS(2012, 4, 29), "ATU", 2012), + ("20041", "830100525", "00032738", "2012", makeTS(2012, 4, 29), makeTS(2012, 4, 29), "ATU", 2012), + ("20041", "830100525", "00032038", "2013", makeTS(2013, 4, 29), makeTS(2013, 4, 29), "FTN", 2013), + ("200410", "190000059", "00044158", null, makeTS(2010, 3, 5), makeTS(2010, 3, 5), null, 2010), + ("200410", "190000059", "00027825", null, makeTS(2011, 5, 13), makeTS(2011, 5, 13), null, 2011), + ("200410", "190000059", "00020161", null, makeTS(2012, 4, 10), makeTS(2012, 4, 10), null, 2012), + ("200410", "190000059", "00022621", null, makeTS(2014, 4, 18), makeTS(2014, 5, 18), null, 2014), + ("2004838055", "680000395", "00018597", "2010", makeTS(2010, 7, 11), makeTS(2010, 7, 11), "ATU F", 2010), + ("2006191920", "680000395", "00009656", "2013", makeTS(2013, 9, 24), makeTS(2013, 9, 24), "ATU N", 2013) + ) + .toDF( + "NUM_ENQ", + "ETA_NUM", + "SEQ_NUM", + "MCO_FBSTC__SOR_ANN", + "EXE_SOI_DTD", + "EXE_SOI_DTF", + "MCO_FBSTC__ACT_COD", + "year" + ) val sources = Sources(mcoCe = Some(df)) @@ -35,9 +45,9 @@ class McoceEmergenciesExtractorSuite extends SharedContext { McoceEmergency("2006191920", "680000395_00009656_2013", makeTS(2013, 9, 24), makeTS(2013, 9, 24)) ).toDS() - val res = McoceEmergenciesExtractor.extract(sources, Set.empty[String]) + val result = McoceEmergenciesExtractor.extract(sources) - assertDSs(expected, res) + assertDSs(expected, result) } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala index 6f8f52e4..e7875f3a 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala @@ -1,10 +1,10 @@ package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays +import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Event, HospitalStay, SsrHospitalStay} import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS -import org.apache.spark.sql.Dataset class SSrHospitalStayExtractorSuite extends SharedContext { @@ -22,7 +22,7 @@ class SSrHospitalStayExtractorSuite extends SharedContext { ).toDS() //When - val result: Dataset[Event[HospitalStay]] = SsrHospitalStaysExtractor.extract(sources, Set.empty) + val result: Dataset[Event[HospitalStay]] = SsrHospitalStaysExtractor.extract(sources) //Then assertDSs(expected, result) @@ -42,7 +42,7 @@ class SSrHospitalStayExtractorSuite extends SharedContext { ).toDS() //When - val result: Dataset[Event[HospitalStay]] = SsrHospitalStaysExtractor.extract(sources, Set("Test")) + val result: Dataset[Event[HospitalStay]] = SsrHospitalStaysExtractor.extract(sources) //Then assertDSs(expected, result) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSourceSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSourceSuite.scala index 45c8e95d..2580139c 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSourceSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSourceSuite.scala @@ -19,7 +19,7 @@ class McoSourceSuite extends SharedContext with McoSource { Some(makeTS(2011, 12, 1)), Some(makeTS(2011, 12, 12)), Some("01122011"), Some("12122011")), ("HasCancer1", Some("C679"), Some("C691"), Some("C643"), Some(0), Some(12), Some(2011), 11, Some(makeTS(2011, 12, 1)), Some(makeTS(2011, 12, 12)), Some("01122011"), Some("12122011")), - ("HasCancer2", Some("C669"), Some("C672"), Some("C643"), Some(0),Some(12), Some(2011), 11, + ("HasCancer2", Some("C669"), Some("C672"), Some("C643"), Some(0), Some(12), Some(2011), 11, None, Some(makeTS(2011, 12, 12)), None, Some("12122011")), ("HasCancer3", Some("C669"), Some("C672"), Some("C643"), Some(0), Some(12), Some(2011), 11, None, None, None, None), @@ -27,7 +27,7 @@ class McoSourceSuite extends SharedContext with McoSource { None, Some(makeTS(2011, 12, 12)), None, Some("12122011")), ("HasCancer5", Some("C679"), Some("B672"), Some("C673"), Some(0), Some(1), Some(2010), 31, Some(makeTS(2011, 12, 1)), Some(makeTS(2011, 12, 12)), Some("01122011"), Some("12122011")), - ("MustBeDropped1", None, None, None, Some(0), Some(1), Some(2010), 31, + ("MustBeDropped1", None, None, None, Some(0), Some(1), Some(2010), 31, Some(makeTS(2011, 12, 1)), Some(makeTS(2011, 12, 12)), Some("01122011"), Some("12122011")), ("MustBeDropped2", None, Some("7"), None, Some(0), Some(1), Some(2010), 31, Some(makeTS(2011, 12, 1)), Some(makeTS(2011, 12, 12)), Some("01122011"), Some("12122011")) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchasesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchasesSuite.scala index 1d26eb93..c5087123 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchasesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchasesSuite.scala @@ -146,7 +146,7 @@ class DcirMoleculePurchasesSuite extends SharedContext { //when val extractor = new DcirMoleculePurchases(config) - val result = extractor.getInput(sources).filter(extractor.isInStudy(config.drugClasses.toSet) _).distinct() + val result = extractor.getInput(sources).filter(extractor.isInStudy _).distinct() //then assertDFs(result, expected) @@ -217,7 +217,7 @@ class DcirMoleculePurchasesSuite extends SharedContext { val expected = Seq(Molecule("patient", "SULFONYLUREA", 2700.0, makeTS(2006, 1, 15))).toDS() // When - val result = new DcirMoleculePurchases(config).extract(sources, config.drugClasses.toSet) + val result = new DcirMoleculePurchases(config).extract(sources) // Then assertDSs(result, expected) } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesSuite.scala index 6f2f012d..ab8ab5fb 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesSuite.scala @@ -29,12 +29,12 @@ class MoleculePurchasesSuite extends SharedContext { irPha = Some(irPha), dosages = Some(dosages) ) - val expected = new DcirMoleculePurchases(config).extract(sources, config.drugClasses.toSet).toDF + val expected = new DcirMoleculePurchases(config).extract(sources) // When - val result = new MoleculePurchases(config).extract(sources).toDF + val result = new MoleculePurchases(config).extract(sources) // Then - assertDFs(result, expected) + assertDSs(result, expected) } } \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala index 6e37d613..976c202d 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala @@ -4,28 +4,12 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts import org.apache.spark.sql.DataFrame import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.events.{DcirNgapAct, Event, NgapAct} import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS class DcirNgapActsExtractorSuite extends SharedContext { - object ngapClassKeyLetterCoefficient extends NgapActClassConfig { - override val ngapPrsNatRefs: Seq[String] = Seq() - val ngapKeyLetters: Seq[String] = Seq("C") - val ngapCoefficients: Seq[String] = Seq( - "0.42" - ) - } - - object ngapPrsNatRef extends NgapActClassConfig { - override val ngapPrsNatRefs: Seq[String] = Seq("1111") - val ngapKeyLetters: Seq[String] = Seq("D") - val ngapCoefficients: Seq[String] = Seq( - "0.45" - ) - } - "extract" should "extract ngap acts events from raw data with a ngapClass based on key letter B2 and coefficient" in { val sqlCtx = sqlContext @@ -38,18 +22,23 @@ class DcirNgapActsExtractorSuite extends SharedContext { val source = new Sources(dcir = Some(dcir), irNat = Some(irNat)) val expected = Seq[Event[NgapAct]]( - DcirNgapAct("Patient_01", "unknown_source", "1111_C_0.42", 1.0, makeTS(2006, 2, 1)), - DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 1.0, makeTS(2006, 1, 15)), - DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 1.0, makeTS(2006, 1, 30)) + DcirNgapAct("Patient_01", "unknown_source", "1111_C_0.42", 0.0, makeTS(2006, 2, 1)), + DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 0.0, makeTS(2006, 1, 15)), + DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 0.0, makeTS(2006, 1, 30)) ).toDS val ngapConf = NgapActConfig( actsCategories = List( - ngapClassKeyLetterCoefficient + new NgapWithNatClassConfig( + ngapKeyLetters = Seq("D"), + ngapCoefficients = Seq("0.45"), + ngapPrsNatRefs = Seq("1111") + ) ) ) + // When - val result = new DcirNgapActExtractor(ngapConf).extract(source, Set.empty) + val result = DcirNgapActExtractor(ngapConf).extract(source) // Then assertDSs(result, expected) @@ -68,18 +57,22 @@ class DcirNgapActsExtractorSuite extends SharedContext { val source = new Sources(dcir = Some(dcir), irNat = Some(irNat)) val expected = Seq[Event[NgapAct]]( - DcirNgapAct("Patient_01", "unknown_source", "1111_C_0.42", 1.0, makeTS(2006, 2, 1)), - DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 1.0, makeTS(2006, 1, 15)), - DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 1.0, makeTS(2006, 1, 30)) + DcirNgapAct("Patient_01", "unknown_source", "1111_C_0.42", 0.0, makeTS(2006, 2, 1)), + DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 0.0, makeTS(2006, 1, 15)), + DcirNgapAct("Patient_01", "liberal", "1111_C_0.42", 0.0, makeTS(2006, 1, 30)) ).toDS val ngapConf = NgapActConfig( actsCategories = List( - ngapPrsNatRef + new NgapWithNatClassConfig( + ngapKeyLetters = Seq("D"), + ngapCoefficients = Seq("0.45"), + ngapPrsNatRefs = Seq("1111") + ) ) ) // When - val result = new DcirNgapActExtractor(ngapConf).extract(source, Set.empty) + val result = DcirNgapActExtractor(ngapConf).extract(source) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala index ccfb0f30..624ebb38 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala @@ -2,22 +2,12 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts import org.apache.spark.sql.DataFrame import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCeFbstcNgapAct, McoCeFcstcNgapAct, NgapAct} import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS class McoNgapActsExtractorSuite extends SharedContext { - object ngapClassKeyLetterCoefficient extends NgapActClassConfig { - val ngapKeyLetters: Seq[String] = Seq("ABG") - val ngapCoefficients: Seq[String] = Seq("42.0") - } - - object ngapKeyLetter extends NgapActClassConfig { - val ngapKeyLetters: Seq[String] = Seq("ABC") - val ngapCoefficients: Seq[String] = Seq.empty - } - "extract" should "extract ngap acts events from raw data with a ngapClass based on key letter B2 and coefficient" in { val sqlCtx = sqlContext @@ -33,11 +23,14 @@ class McoNgapActsExtractorSuite extends SharedContext { val ngapConf = NgapActConfig( actsCategories = List( - ngapClassKeyLetterCoefficient + NgapActClassConfig( + ngapKeyLetters = Seq("ABG"), + ngapCoefficients = Seq("42.0") + ) ) ) // When - val result = new McoCeFbstcNgapActExtractor(ngapConf).extract(source, Set.empty) + val result = McoCeFbstcNgapActExtractor(ngapConf).extract(source) // Then assertDSs(result, expected) } @@ -57,11 +50,14 @@ class McoNgapActsExtractorSuite extends SharedContext { val ngapConf = NgapActConfig( actsCategories = List( - ngapKeyLetter + NgapActClassConfig( + ngapKeyLetters = Seq("ABC"), + ngapCoefficients = Seq.empty + ) ) ) // When - val result = new McoCeFbstcNgapActExtractor(ngapConf).extract(source, Set.empty) + val result = McoCeFbstcNgapActExtractor(ngapConf).extract(source) // Then assertDSs(result, expected) } @@ -84,7 +80,7 @@ class McoNgapActsExtractorSuite extends SharedContext { actsCategories = List.empty ) // When - val result = new McoCeFbstcNgapActExtractor(ngapConf).extract(source, Set.empty) + val result = McoCeFbstcNgapActExtractor(ngapConf).extract(source) // Then assertDSs(result, expected) @@ -108,7 +104,7 @@ class McoNgapActsExtractorSuite extends SharedContext { actsCategories = List.empty ) // When - val result = new McoCeFcstcNgapActExtractor(ngapConf).extract(source, Set.empty) + val result = McoCeFcstcNgapActExtractor(ngapConf).extract(source) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatientsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatientsSuite.scala index d949a0f5..2af3bf26 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatientsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatientsSuite.scala @@ -1,10 +1,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients import java.sql.Timestamp - -import fr.polytechnique.cmap.cnam.SharedContext import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ +import fr.polytechnique.cmap.cnam.SharedContext class HadPatientsSuite extends SharedContext { @@ -88,7 +87,7 @@ class HadPatientsSuite extends SharedContext { val expected: DataFrame = Seq.empty[ (String, Timestamp) - ].toDF("patientID", "deathDate") + ].toDF("patientID", "deathDate") // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatientsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatientsSuite.scala index a7a60212..9a501dc0 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatientsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatientsSuite.scala @@ -5,8 +5,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients import org.apache.spark.sql.functions.lit import org.apache.spark.sql.types.TimestampType import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.extractors.patients.McocePatients.McocePatientsImplicit import fr.polytechnique.cmap.cnam.etl.implicits +import fr.polytechnique.cmap.cnam.etl.extractors.patients.McocePatients.McocePatientsImplicit import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.study.fall.config.FallConfig import fr.polytechnique.cmap.cnam.util.functions.makeTS diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsSuite.scala index 236855a3..32e7dc65 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsSuite.scala @@ -4,7 +4,6 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients import org.apache.spark.sql.{Column, DataFrame, Dataset} import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.Event import fr.polytechnique.cmap.cnam.etl.patients.Patient import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions._ @@ -93,7 +92,8 @@ class PatientsSuite extends SharedContext { mco = Some(mcoDf), irBen = Some(irBenDf), mcoCe = Some(mcoceDf), - ssr = Some(ssrDf)) + ssr = Some(ssrDf) + ) // When val result = new Patients(config).extract(sources) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala index b5ea67d5..e3097194 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala @@ -6,11 +6,10 @@ import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCeFbstcMedicalPractitionerClaim, McoCeFcstcMedicalPractitionerClaim, MedicalPractitionerClaim, NonMedicalPractitionerClaim, PractitionerClaimSpeciality} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS -import scala.collection.immutable.Stream.Empty - class PractitionerClaimSpecialityExtractorSuite extends SharedContext { "extract" should "extract health care related services provided by medical practitioner raw data" in { @@ -19,7 +18,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { import sqlCtx.implicits._ // Given - val medicalSpeCodes = List("42") + val medicalSpeCodes = BaseExtractorCodes(List("42")) val input = spark.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") val sources = Sources(dcir = Some(input)) @@ -31,7 +30,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // When - val result = MedicalPractitionerClaimExtractor.extract(sources, medicalSpeCodes.toSet) + val result = MedicalPractitionerClaimExtractor(medicalSpeCodes).extract(sources) // Then assertDSs(result, expected) @@ -44,7 +43,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { import sqlCtx.implicits._ // Given - val nonMedicalSpeCodes = List("42") + val nonMedicalSpeCodes = BaseExtractorCodes(List("42")) val input = spark.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") val sources = Sources(dcir = Some(input)) @@ -58,7 +57,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // When - val result = NonMedicalPractitionerClaimExtractor.extract(sources, nonMedicalSpeCodes.toSet) + val result = NonMedicalPractitionerClaimExtractor(nonMedicalSpeCodes).extract(sources) // Then assertDSs(result, expected) @@ -75,7 +74,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { val expected = "A10000001" // When - val result = NonMedicalPractitionerClaimExtractor.extractGroupId(row) + val result = NonMedicalPractitionerClaimExtractor(BaseExtractorCodes.empty).extractGroupId(row) // Then assert(result == expected) @@ -88,7 +87,6 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { import sqlCtx.implicits._ // Given - val medicalSpeCodes = List() val input = spark.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") val sources = Sources(dcir = Some(input)) @@ -100,7 +98,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // When - val result = MedicalPractitionerClaimExtractor.extract(sources, medicalSpeCodes.toSet) + val result = MedicalPractitionerClaimExtractor(BaseExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) @@ -112,7 +110,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { import sqlCtx.implicits._ // Given - val medicalSpeCodes = List("1") + val medicalSpeCodes = BaseExtractorCodes(List("1")) val input = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") val sources = Sources(mcoCe = Some(input)) @@ -122,7 +120,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // When - val result = McoCeFbstcSpecialtyExtractor.extract(sources, medicalSpeCodes.toSet) + val result = McoCeFbstcSpecialtyExtractor(medicalSpeCodes).extract(sources) // Then assertDSs(result, expected) @@ -134,7 +132,6 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { import sqlCtx.implicits._ // Given - val medicalSpeCodes = List.empty val input = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") val sources = Sources(mcoCe = Some(input)) @@ -145,7 +142,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // When - val result = McoCeFbstcSpecialtyExtractor.extract(sources, medicalSpeCodes.toSet) + val result = McoCeFbstcSpecialtyExtractor(BaseExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) @@ -169,7 +166,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // When - val result = McoCeFcstcSpecialtyExtractor.extract(sources, medicalSpeCodes.toSet) + val result = McoCeFcstcSpecialtyExtractor(BaseExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala index 5a0a16a6..2ddfecab 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala @@ -1,9 +1,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.ssr +import org.apache.spark.sql.DataFrame import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.util.functions.makeTS -import org.apache.spark.sql.DataFrame -import org.apache.spark.sql.functions._ class SsrSourceSuite extends SharedContext with SsrSource { @@ -11,21 +10,21 @@ class SsrSourceSuite extends SharedContext with SsrSource { val fakeSsrData = { - val sqlCtx = sqlContext - import sqlCtx.implicits._ - - Seq( - ("Patient1", Some("C669"), Some("C672"), Some("C643"), Some("12122011"), Some(2011), Some(12)), - ("Patient1", Some("C679"), Some("C691"), Some("C643"), Some("01122011"), Some(2011), Some(12)), - ("Patient1", Some("C679"), Some("C691"), Some("C643"), Some("15012012"), Some(2012), Some(1)), - ("Patient2", Some("C669"), Some("C672"), Some("C643"), None, Some(2011), Some(11)), - ("Patient3", Some("C679"), Some("B672"), Some("C673"), None, Some(2011), Some(5)), - ("MustBeDropped1", None, None, None, Some("31122011"), Some(2011), Some(12)) - ).toDF( - "NUM_ENQ", "SSR_B__MOR_PRP", "SSR_B__ETL_AFF", "SSR_D__DGN_COD", - "ENT_DAT", "ANN_LUN_1S", "MOI_LUN_1S" - ) - } + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + Seq( + ("Patient1", Some("C669"), Some("C672"), Some("C643"), Some("12122011"), Some(2011), Some(12)), + ("Patient1", Some("C679"), Some("C691"), Some("C643"), Some("01122011"), Some(2011), Some(12)), + ("Patient1", Some("C679"), Some("C691"), Some("C643"), Some("15012012"), Some(2012), Some(1)), + ("Patient2", Some("C669"), Some("C672"), Some("C643"), None, Some(2011), Some(11)), + ("Patient3", Some("C679"), Some("B672"), Some("C673"), None, Some(2011), Some(5)), + ("MustBeDropped1", None, None, None, Some("31122011"), Some(2011), Some(12)) + ).toDF( + "NUM_ENQ", "SSR_B__MOR_PRP", "SSR_B__ETL_AFF", "SSR_D__DGN_COD", + "ENT_DAT", "ANN_LUN_1S", "MOI_LUN_1S" + ) + } val sqlCtx = sqlContext import sqlCtx.implicits._ diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOveReasonSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOveReasonSuite.scala index fbcc5ecd..8cadd95a 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOveReasonSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOveReasonSuite.scala @@ -1,18 +1,19 @@ package fr.polytechnique.cmap.cnam.etl.extractors.takeOverReasons import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.events.{Event, HadAssociatedTakeOver, HadMainTakeOver, MedicalTakeOverReason} +import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions._ class HadTakeOveReasonSuite extends SharedContext { - + "extract" should "return a DataSet of HadMainTakeOveReasons" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val takeOverReasonCodes = Set("1") + val takeOverReasonCodes = BaseExtractorCodes(List("1")) val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") val expected = Seq[Event[MedicalTakeOverReason]]( HadMainTakeOver("patient01", "10000123_30000123_2019", "1", makeTS(2019, 11, 21)) @@ -20,7 +21,7 @@ class HadTakeOveReasonSuite extends SharedContext { val input = Sources(had = Some(had)) // When - val result = HadMainTakeOverExtractor.extract(input, takeOverReasonCodes) + val result = HadMainTakeOverExtractor(takeOverReasonCodes).extract(input) // Then assertDSs(result, expected) @@ -40,7 +41,7 @@ class HadTakeOveReasonSuite extends SharedContext { val input = Sources(had = Some(had)) // When - val result = HadMainTakeOverExtractor.extract(input, Set.empty) + val result = HadMainTakeOverExtractor(BaseExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) @@ -60,7 +61,7 @@ class HadTakeOveReasonSuite extends SharedContext { val input = Sources(had = Some(had)) // When - val result = HadAssociatedTakeOverExtractor.extract(input, Set.empty) + val result = HadAssociatedTakeOverExtractor(BaseExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformerSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformerSuite.scala index 1c783247..289434f7 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformerSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformerSuite.scala @@ -24,9 +24,9 @@ class FracturesTransformerSuite extends SharedContext { val testConf = defaultConf.copy(outcomes = FracturesConfig(fallFrame = 3.months)) val acts: Dataset[Event[MedicalAct]] = Seq( //pubic ambulatory acts - McoCEAct("georgette", DcirAct.groupID.PublicAmbulatory, "MZMP007", 1.0, makeTS(2010, 2, 6)), - McoCEAct("georgettebis", DcirAct.groupID.PublicAmbulatory, "MZMP007", 1.0, makeTS(2010, 2, 6)), - McoCEAct("george", DcirAct.groupID.PublicAmbulatory, "whatever", 1.0, makeTS(2010, 2, 6)), + McoCeCcamAct("georgette", DcirAct.groupID.PublicAmbulatory, "MZMP007", 1.0, makeTS(2010, 2, 6)), + McoCeCcamAct("georgettebis", DcirAct.groupID.PublicAmbulatory, "MZMP007", 1.0, makeTS(2010, 2, 6)), + McoCeCcamAct("george", DcirAct.groupID.PublicAmbulatory, "whatever", 1.0, makeTS(2010, 2, 6)), DcirAct("john", DcirAct.groupID.PublicAmbulatory, "MZMP007", 1.0, makeTS(2010, 2, 6)), //private ambulatory acts DcirAct("riri", DcirAct.groupID.PrivateAmbulatory, "NBEP002", 1.0, makeTS(2007, 1, 1)), diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFracturesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFracturesSuite.scala index d63514a1..737ef97b 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFracturesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFracturesSuite.scala @@ -3,14 +3,14 @@ package fr.polytechnique.cmap.cnam.study.fall.fractures import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, McoCEAct, McoCIM10Act, Outcome} +import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, McoCeCcamAct, McoCIM10Act, Outcome} import fr.polytechnique.cmap.cnam.util.functions.makeTS class PublicAmbulatoryFracturesSuite extends SharedContext { "isPublicAmbulatory" should "return true for correct events" in { // Given - val event = McoCEAct("georgette", "ACE", "angine", makeTS(2010, 2, 6)) + val event = McoCeCcamAct("georgette", "ACE", "angine", makeTS(2010, 2, 6)) // When val result = PublicAmbulatoryFractures.isPublicAmbulatory(event) @@ -32,7 +32,7 @@ class PublicAmbulatoryFracturesSuite extends SharedContext { "containsNonHospitalizedCcam" should "return true for correct events" in { // Given - val event = McoCEAct("georgette", "ACE", "MZMP007", makeTS(2010, 2, 6)) + val event = McoCeCcamAct("georgette", "ACE", "MZMP007", makeTS(2010, 2, 6)) // When val result = PublicAmbulatoryFractures.containsNonHospitalizedCcam(event) @@ -47,8 +47,8 @@ class PublicAmbulatoryFracturesSuite extends SharedContext { // Given val events = Seq( - McoCEAct("georgette", "ACE", "MZMP007", makeTS(2010, 2, 6)), - McoCEAct("george", "ACE", "whatever", makeTS(2010, 2, 6)), + McoCeCcamAct("georgette", "ACE", "MZMP007", makeTS(2010, 2, 6)), + McoCeCcamAct("george", "ACE", "whatever", makeTS(2010, 2, 6)), DcirAct("john", "ACE", "MZMP007", makeTS(2010, 2, 6)) ).toDS diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/outcomes/NaiveBladderCancerSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/outcomes/NaiveBladderCancerSuite.scala index dd1771b0..b9984f17 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/outcomes/NaiveBladderCancerSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/outcomes/NaiveBladderCancerSuite.scala @@ -18,7 +18,7 @@ class NaiveBladderCancerSuite extends SharedContext { McoMainDiagnosis("PatientA", "C67", makeTS(2010, 1, 1)), McoLinkedDiagnosis("PatientA", "C67", makeTS(2010, 2, 1)), McoAssociatedDiagnosis("PatientA", "C67", makeTS(2010, 3, 1)), - ImbDiagnosis("PatientA", "C67", makeTS(2010, 4, 1)), + ImbCcamDiagnosis("PatientA", "C67", makeTS(2010, 4, 1)), McoMainDiagnosis("PatientA", "ABC", makeTS(2010, 5, 1)) ).toDS From 4b17f383b29b60ed0b1c88179bd7d9029cd0ac85 Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Wed, 1 Apr 2020 16:19:34 +0200 Subject: [PATCH 27/38] CNAM-452: Add tests. CNAM-452: Add tests. --- .../cmap/cnam/etl/config/ConfigLoader.scala | 2 +- .../etl/extractors/EventRowExtractor.scala | 9 +- .../cmap/cnam/etl/extractors/Extractor.scala | 12 +-- .../cnam/etl/extractors/ExtractorCodes.scala | 26 ----- ...cExtractor.scala => SimpleExtractor.scala} | 13 +-- .../extractors/acts/HadCcamActExtractor.scala | 14 --- .../acts/McoCeCcamActExtractor.scala | 16 --- .../extractors/acts/SsrCeActExtractor.scala | 14 --- .../classifications/GhmExtractor.scala | 15 --- .../etl/extractors/codes/ExtractorCodes.scala | 7 ++ .../codes/SimpleExtractorCodes.scala | 21 ++++ .../diagnoses/HadDiagnosisExtractor.scala | 21 ---- .../diagnoses/McoDiagnosisExtractor.scala | 28 ------ .../etl/extractors/drugs/DrugConfig.scala | 19 ---- .../acts/DcirMedicalActExtractor.scala | 22 +++-- .../events/acts/HadCcamActExtractor.scala | 15 +++ .../acts/McoCcamActExtractor.scala | 11 ++- .../events/acts/McoCeCcamActExtractor.scala | 17 ++++ .../{ => events}/acts/MedicalActsConfig.scala | 2 +- .../{ => events}/acts/SsrActExtractor.scala | 15 +-- .../events/acts/SsrCeActExtractor.scala | 15 +++ .../events/classifications/GhmExtractor.scala | 16 +++ .../diagnoses/DiagnosesConfig.scala | 2 +- .../diagnoses/HadDiagnosisExtractor.scala | 22 +++++ .../diagnoses/ImbCimDiagnosisExtractor.scala | 11 ++- .../diagnoses/McoDiagnosisExtractor.scala | 29 ++++++ .../diagnoses/SsrDiagnosisExtractor.scala | 19 ++-- .../extractors/events/drugs/DrugConfig.scala | 20 ++++ .../{ => events}/drugs/DrugExtractor.scala | 2 +- .../classification/DrugClassConfig.scala | 2 +- .../PharmacologicalClassConfig.scala | 2 +- .../families/Antidepresseurs.scala | 4 +- .../families/Antiepileptics.scala | 4 +- .../families/Antihypertenseurs.scala | 4 +- .../classification/families/Cardiac.scala | 4 +- .../classification/families/Hypnotiques.scala | 4 +- .../families/Neuroleptiques.scala | 4 +- .../classification/families/Opioids.scala | 4 +- .../families/ProtonPumpInhibitors.scala | 4 +- .../{ => events}/drugs/level/Cip13Level.scala | 4 +- .../drugs/level/DrugClassificationLevel.scala | 4 +- .../level/MoleculeCombinationLevel.scala | 4 +- .../drugs/level/PharmacologicalLevel.scala | 4 +- .../drugs/level/TherapeuticLevel.scala | 4 +- .../HadHospitalStaysExtractor.scala | 11 ++- .../McoHospitalStaysExtractor.scala | 11 ++- .../McoceEmergenciesExtractor.scala | 24 ++--- .../SsrHospitalStaysExtractor.scala | 11 ++- .../molecules/DcirMoleculePurchases.scala | 2 +- .../molecules/MoleculePurchases.scala | 2 +- .../molecules/MoleculePurchasesConfig.scala | 5 +- .../ngapacts/DcirNgapActExtractor.scala | 6 +- .../ngapacts/McoCeNgapActExtractor.scala | 6 +- .../ngapacts/NgapActClassConfig.scala | 2 +- .../{ => events}/ngapacts/NgapActConfig.scala | 5 +- .../prestations/McoCeSpecialtyExtractor.scala | 15 +-- .../PractitionerClaimSpecialityConfig.scala | 2 +- ...PractitionerClaimSpecialityExtractor.scala | 17 ++-- .../HadTakeOverReasonExtractor.scala | 15 +-- .../{ => sources}/dcir/DcirRowExtractor.scala | 2 +- .../dcir/DcirSimpleExtractor.scala} | 6 +- .../{ => sources}/dcir/DcirSource.scala | 23 +---- .../{ => sources}/had/HadRowExtractor.scala | 2 +- .../had/HadSimpleExtractor.scala} | 6 +- .../{ => sources}/had/HadSource.scala | 2 +- .../{ => sources}/imb/ImbRowExtractor.scala | 4 +- .../imb/ImbSimpleExtractor.scala} | 8 +- .../{ => sources}/imb/ImbSource.scala | 2 +- .../{ => sources}/mco/McoRowExtractor.scala | 2 +- .../mco/McoSimpleExtractor.scala} | 6 +- .../{ => sources}/mco/McoSource.scala | 2 +- .../mcoce}/McoCeRowExtractor.scala | 2 +- .../mcoce/McoCeSimpleExtractor.scala} | 6 +- .../mcoce}/McoCeSource.scala | 2 +- .../{ => sources}/ssr/SsrRowExtractor.scala | 4 +- .../ssr/SsrSimpleExtractor.scala} | 6 +- .../{ => sources}/ssr/SsrSource.scala | 2 +- .../ssrce/SsrCeRowExtractor.scala | 2 +- .../ssrce/SsrCeSimpleExtractor.scala} | 7 +- .../{ => sources}/ssrce/SsrCeSource.scala | 2 +- .../cmap/cnam/study/bulk/BulkConfig.scala | 6 +- .../cnam/study/bulk/BulkConfigLoader.scala | 2 +- .../cmap/cnam/study/bulk/BulkMain.scala | 8 +- .../cmap/cnam/study/fall/FallMain.scala | 2 +- .../cnam/study/fall/FallMainExtract.scala | 2 +- .../cnam/study/fall/config/FallConfig.scala | 12 +-- .../study/fall/config/FallConfigLoader.scala | 2 +- .../fall/config/FallDrugClassConfig.scala | 4 +- .../study/fall/extractors/ActsExtractor.scala | 10 +- .../fall/extractors/CardiacExtractor.scala | 6 +- .../study/fall/extractors/ControlDrugs.scala | 6 +- .../fall/extractors/DiagnosisExtractor.scala | 10 +- .../fall/extractors/DrugsExtractor.scala | 2 +- .../fall/extractors/EpilepticsExtractor.scala | 10 +- .../study/fall/extractors/HTAExtractor.scala | 6 +- .../study/fall/extractors/IPPExtractor.scala | 6 +- .../extractors/McoDiagnosisExtractor.scala | 18 ++-- .../fall/extractors/OpioidsExtractor.scala | 6 +- .../pioglitazone/PioglitazoneConfig.scala | 6 +- .../study/pioglitazone/PioglitazoneMain.scala | 4 +- .../pioglitazone/extractors/Diagnoses.scala | 10 +- .../pioglitazone/extractors/MedicalActs.scala | 8 +- .../rosiglitazone/RosiglitazoneConfig.scala | 4 +- .../rosiglitazone/RosiglitazoneMain.scala | 4 +- .../rosiglitazone/extractors/Diagnoses.scala | 10 +- .../extractors/EventRowExtractorSuite.scala | 18 +++- .../PrescriptionExtractorSuite.scala | 46 --------- .../acts/DcirBiologyActsSuite.scala | 22 ++--- .../acts/DcirMedicalActsSuite.scala | 4 +- .../acts/HadMedicalActsSuite.scala | 8 +- .../acts/McoCEMedicalActsSuite.scala | 12 +-- .../acts/McoMedicalActsSuite.scala | 8 +- .../acts/MedicalActsConfigSuite.scala | 2 +- .../acts/SsrCEMedicalActsSuite.scala | 12 +-- .../acts/SsrMedicalActsSuite.scala | 12 +-- .../GHMClassificationsSuite.scala | 8 +- .../diagnoses/HadDiagnosesSuite.scala | 10 +- .../diagnoses/ImbDiagnosesSuite.scala | 10 +- .../diagnoses/McoDiagnosesSuite.scala | 10 +- .../diagnoses/SsrDiagnosesSuite.scala | 16 +-- .../drugs/DrugsExtractorSuite.scala | 8 +- .../PharmacologicalClassConfigSuite.scala | 6 +- .../drugs/level/Cip13LevelSuite.scala | 4 +- .../level/DrugClassficationLevelSuite.scala | 2 +- .../level/MoleculeCombinationSuite.scala | 4 +- .../drugs/level/PharmacologicalSuite.scala | 4 +- .../drugs/level/TherapeuticSuite.scala | 4 +- .../HadHospitalStayExtractorSuite.scala | 2 +- .../McoHospitalStayExtractorSuite.scala | 2 +- .../McoceEmergenciesExtractorSuite.scala | 2 +- .../SSrHospitalStayExtractorSuite.scala | 2 +- .../DcirMoleculePurchasesSuite.scala | 2 +- .../molecules/MoleculePurchasesSuite.scala | 2 +- .../ngapacts/DcirNgapActsExtractorSuite.scala | 2 +- .../ngapacts/McoNgapActsExtractorSuite.scala | 2 +- ...itionerClaimSpecialityExtractorSuite.scala | 18 ++-- .../HadTakeOveReasonSuite.scala | 10 +- .../sources/dcir/DcirRowExtractorSuite.scala | 97 +++++++++++++++++++ .../sources/had/HadRowExtractorSuite.scala | 72 ++++++++++++++ .../{ => sources}/had/HadSourceSuite.scala | 2 +- .../sources/mco/McoRowExtractorSuite.scala | 72 ++++++++++++++ .../{ => sources}/mco/McoSourceSuite.scala | 2 +- .../mcoce/McoCeRowExtractorSuite.scala | 72 ++++++++++++++ .../sources/ssr/SsrRowExtractorSuite.scala | 73 ++++++++++++++ .../{ => sources}/ssr/SsrSourceSuite.scala | 2 +- .../ssrce/SsrCeRowExtractorSuite.scala | 51 ++++++++++ .../study/fall/config/FallConfigSuite.scala | 2 +- 147 files changed, 1007 insertions(+), 599 deletions(-) delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ExtractorCodes.scala rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{BasicExtractor.scala => SimpleExtractor.scala} (77%) delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadCcamActExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeCcamActExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GhmExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/codes/ExtractorCodes.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/codes/SimpleExtractorCodes.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosisExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosisExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugConfig.scala rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/DcirMedicalActExtractor.scala (74%) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/HadCcamActExtractor.scala rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/McoCcamActExtractor.scala (64%) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCeCcamActExtractor.scala rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/MedicalActsConfig.scala (95%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/SsrActExtractor.scala (64%) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrCeActExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/classifications/GhmExtractor.scala rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/diagnoses/DiagnosesConfig.scala (92%) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosisExtractor.scala rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/diagnoses/ImbCimDiagnosisExtractor.scala (65%) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosisExtractor.scala rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/diagnoses/SsrDiagnosisExtractor.scala (50%) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugConfig.scala rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/DrugExtractor.scala (98%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/classification/DrugClassConfig.scala (73%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/classification/PharmacologicalClassConfig.scala (92%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/classification/families/Antidepresseurs.scala (98%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/classification/families/Antiepileptics.scala (98%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/classification/families/Antihypertenseurs.scala (99%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/classification/families/Cardiac.scala (72%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/classification/families/Hypnotiques.scala (97%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/classification/families/Neuroleptiques.scala (98%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/classification/families/Opioids.scala (97%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/classification/families/ProtonPumpInhibitors.scala (98%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/level/Cip13Level.scala (72%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/level/DrugClassificationLevel.scala (78%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/level/MoleculeCombinationLevel.scala (72%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/level/PharmacologicalLevel.scala (81%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/level/TherapeuticLevel.scala (78%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/hospitalstays/HadHospitalStaysExtractor.scala (53%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/hospitalstays/McoHospitalStaysExtractor.scala (72%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/hospitalstays/McoceEmergenciesExtractor.scala (55%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/hospitalstays/SsrHospitalStaysExtractor.scala (61%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/molecules/DcirMoleculePurchases.scala (98%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/molecules/MoleculePurchases.scala (83%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/molecules/MoleculePurchasesConfig.scala (79%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/ngapacts/DcirNgapActExtractor.scala (95%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/ngapacts/McoCeNgapActExtractor.scala (94%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/ngapacts/NgapActClassConfig.scala (92%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/ngapacts/NgapActConfig.scala (89%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/prestations/McoCeSpecialtyExtractor.scala (62%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/prestations/PractitionerClaimSpecialityConfig.scala (92%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/prestations/PractitionerClaimSpecialityExtractor.scala (71%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{takeOverReasons => events/takeoverreasons}/HadTakeOverReasonExtractor.scala (50%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/dcir/DcirRowExtractor.scala (97%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{dcir/DcirBasicExtractor.scala => sources/dcir/DcirSimpleExtractor.scala} (57%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/dcir/DcirSource.scala (72%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/had/HadRowExtractor.scala (92%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{had/HadBasicExtractor.scala => sources/had/HadSimpleExtractor.scala} (60%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/had/HadSource.scala (97%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/imb/ImbRowExtractor.scala (93%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{imb/ImbBasicExtractor.scala => sources/imb/ImbSimpleExtractor.scala} (52%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/imb/ImbSource.scala (86%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/mco/McoRowExtractor.scala (95%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{mco/McoBasicExtractor.scala => sources/mco/McoSimpleExtractor.scala} (59%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/mco/McoSource.scala (98%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{mcoCe => sources/mcoce}/McoCeRowExtractor.scala (93%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{mcoCe/McoCeBasicExtractor.scala => sources/mcoce/McoCeSimpleExtractor.scala} (54%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{mcoCe => sources/mcoce}/McoCeSource.scala (95%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/ssr/SsrRowExtractor.scala (91%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ssr/SsrBasicExtractor.scala => sources/ssr/SsrSimpleExtractor.scala} (59%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/ssr/SsrSource.scala (98%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/ssrce/SsrCeRowExtractor.scala (88%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ssrce/SsrCeBasicExtractor.scala => sources/ssrce/SsrCeSimpleExtractor.scala} (57%) rename src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/ssrce/SsrCeSource.scala (87%) delete mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/PrescriptionExtractorSuite.scala rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/DcirBiologyActsSuite.scala (87%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/DcirMedicalActsSuite.scala (94%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/HadMedicalActsSuite.scala (84%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/McoCEMedicalActsSuite.scala (84%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/McoMedicalActsSuite.scala (87%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/MedicalActsConfigSuite.scala (83%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/SsrCEMedicalActsSuite.scala (85%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/acts/SsrMedicalActsSuite.scala (87%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/classifications/GHMClassificationsSuite.scala (87%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/diagnoses/HadDiagnosesSuite.scala (87%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/diagnoses/ImbDiagnosesSuite.scala (86%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/diagnoses/McoDiagnosesSuite.scala (89%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/diagnoses/SsrDiagnosesSuite.scala (88%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/DrugsExtractorSuite.scala (98%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/PharmacologicalClassConfigSuite.scala (92%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/level/Cip13LevelSuite.scala (90%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/level/DrugClassficationLevelSuite.scala (90%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/level/MoleculeCombinationSuite.scala (90%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/level/PharmacologicalSuite.scala (92%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/drugs/level/TherapeuticSuite.scala (92%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/hospitalstays/HadHospitalStayExtractorSuite.scala (96%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/hospitalstays/McoHospitalStayExtractorSuite.scala (98%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/hospitalstays/McoceEmergenciesExtractorSuite.scala (97%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/hospitalstays/SSrHospitalStayExtractorSuite.scala (96%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/molecules/DcirMoleculePurchasesSuite.scala (99%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/molecules/MoleculePurchasesSuite.scala (94%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/ngapacts/DcirNgapActsExtractorSuite.scala (97%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/ngapacts/McoNgapActsExtractorSuite.scala (98%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => events}/prestations/PractitionerClaimSpecialityExtractorSuite.scala (88%) rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{takeOverReasons => events/takeoverreasons}/HadTakeOveReasonSuite.scala (85%) create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractorSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractorSuite.scala rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/had/HadSourceSuite.scala (97%) create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractorSuite.scala rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/mco/McoSourceSuite.scala (97%) create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractorSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractorSuite.scala rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/{ => sources}/ssr/SsrSourceSuite.scala (96%) create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractorSuite.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/config/ConfigLoader.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/config/ConfigLoader.scala index e80fd271..da78e94f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/config/ConfigLoader.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/config/ConfigLoader.scala @@ -11,7 +11,7 @@ import me.danielpes.spark.datetime.implicits._ import pureconfig._ import pureconfig.configurable.{localDateConfigConvert, localDateTimeConfigConvert} import pureconfig.generic.{CoproductHint, EnumCoproductHint, FieldCoproductHint, ProductHint} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.DrugClassificationLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.DrugClassificationLevel import fr.polytechnique.cmap.cnam.etl.transformers.exposures.ExposurePeriodAdder trait ConfigLoader { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala index 0c8c7ebc..5e0f64fd 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala @@ -7,15 +7,18 @@ import org.apache.spark.sql.Row trait EventRowExtractor { self: ColumnNames => - def usedColumns: List[String] = List.empty def extractPatientId(r: Row): String + def extractStart(r: Row): Timestamp + def extractGroupId(r: Row): String = "NA" - def extractWeight(r: Row): Double = 0.0 + def extractValue(r: Row): String = "NA" - def extractStart(r: Row): Timestamp + def extractWeight(r: Row): Double = 0.0 def extractEnd(r: Row): Option[Timestamp] = None + + def usedColumns: List[String] = List.empty } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala index 427e7a61..ae3a357e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala @@ -5,13 +5,14 @@ package fr.polytechnique.cmap.cnam.etl.extractors import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.{DataFrame, Dataset, Row} import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event} +import fr.polytechnique.cmap.cnam.etl.extractors.codes.ExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources trait Extractor[EventType <: AnyEvent, +Codes <: ExtractorCodes] extends Serializable { def getCodes: Codes - /** Allows to check if the Row from the Source is considered in the current Study. + /** Allows to check if the Row is considered in the current Study. * * @param row The row itself. * @return A boolean value. @@ -33,10 +34,10 @@ trait Extractor[EventType <: AnyEvent, +Codes <: ExtractorCodes] extends Seriali */ def builder(row: Row): Seq[Event[EventType]] - /** Gets and prepares all the needed columns from the Source. + /** Gets and prepares all the needed columns from the Sources. * * @param sources Source object [[Sources]] that contains all sources. - * @return A dataframe with mco columns. + * @return A dataframe with needed columns. */ def getInput(sources: Sources): DataFrame @@ -46,8 +47,8 @@ trait Extractor[EventType <: AnyEvent, +Codes <: ExtractorCodes] extends Seriali * This method should be considered the unique callable method from a Study perspective. * * @param sources Source object [[Sources]] that contains all sources. - * @param ctag An implicit parameter taken from Eventype type. - * @return A dataset of Events. + * @param ctag An implicit parameter taken from EventType type. + * @return A Dataset of Events of type EventType. */ def extract(sources: Sources)(implicit ctag: TypeTag[EventType]): Dataset[Event[EventType]] = { val input: DataFrame = getInput(sources) @@ -63,5 +64,4 @@ trait Extractor[EventType <: AnyEvent, +Codes <: ExtractorCodes] extends Seriali } }.flatMap(builder).distinct() } - } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ExtractorCodes.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ExtractorCodes.scala deleted file mode 100644 index 42c59ee3..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ExtractorCodes.scala +++ /dev/null @@ -1,26 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors - -import scala.collection.immutable.HashSet - -trait ExtractorCodes extends Serializable { - def isEmpty: Boolean -} - -class BaseExtractorCodes(val codes: List[String]) extends ExtractorCodes { - val internalCodes: HashSet[String] = codes.to[HashSet] - - override def isEmpty: Boolean = internalCodes.isEmpty - - def exists(p: String => Boolean): Boolean = internalCodes.exists(p) - - def contains(code: String): Boolean = internalCodes.contains(code) -} - -object BaseExtractorCodes { - def empty = new BaseExtractorCodes(List.empty) - - def apply(codes: List[String]): BaseExtractorCodes = new BaseExtractorCodes(codes) - -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/BasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala similarity index 77% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/BasicExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala index 393b15b1..5dbd6378 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/BasicExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala @@ -4,8 +4,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes -trait BasicExtractor[EventType <: AnyEvent] extends Extractor[EventType, BaseExtractorCodes] { +trait SimpleExtractor[EventType <: AnyEvent] extends Extractor[EventType, SimpleExtractorCodes] { self: EventRowExtractor => def columnName: String @@ -25,25 +26,25 @@ trait BasicExtractor[EventType <: AnyEvent] extends Extractor[EventType, BaseExt Seq(eventBuilder[EventType](patientId, groupId, value, weight, eventDate, endDate)) } - def extractValue(row: Row): String = row.getAs[String](columnName) + override def extractValue(row: Row): String = row.getAs[String](columnName) } sealed trait InStudyStrategy[EventType <: AnyEvent] { - self: BasicExtractor[EventType]=> + self: SimpleExtractor[EventType]=> override def isInStudy(row: Row): Boolean } trait AlwaysTrueStrategy[EventType <: AnyEvent] extends InStudyStrategy[EventType] { - self: BasicExtractor[EventType] => + self: SimpleExtractor[EventType] => def isInStudy(row: Row): Boolean = true } trait IsInStrategy[EventType <: AnyEvent] extends InStudyStrategy[EventType] { - self: BasicExtractor[EventType] => + self: SimpleExtractor[EventType] => def isInStudy(row: Row): Boolean = getCodes.contains(extractValue(row)) } trait StartsWithStrategy[EventType <: AnyEvent] extends InStudyStrategy[EventType] { - self: BasicExtractor[EventType] => + self: SimpleExtractor[EventType] => def isInStudy(row: Row): Boolean = getCodes.exists(extractValue(row).startsWith) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadCcamActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadCcamActExtractor.scala deleted file mode 100644 index 3aeb2953..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadCcamActExtractor.scala +++ /dev/null @@ -1,14 +0,0 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.acts - -import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HadCCAMAct, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.had.HadBasicExtractor - -final case class HadCcamActExtractor(codes: BaseExtractorCodes) extends HadBasicExtractor[MedicalAct] - with StartsWithStrategy[MedicalAct] { - override val columnName: String = ColNames.CCAM - override val eventBuilder: EventBuilder = HadCCAMAct - override def getCodes: BaseExtractorCodes = codes -} - - diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeCcamActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeCcamActExtractor.scala deleted file mode 100644 index 4c09da9c..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCeCcamActExtractor.scala +++ /dev/null @@ -1,16 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors.acts - - -import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, McoCeCcamAct, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeBasicExtractor - -final case class McoCeCcamActExtractor(codes: BaseExtractorCodes) extends McoCeBasicExtractor[MedicalAct] - with StartsWithStrategy[MedicalAct] { - override val eventBuilder: EventBuilder = McoCeCcamAct - override val columnName: String = ColNames.CamCode - - override def getCodes: BaseExtractorCodes = codes -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala deleted file mode 100644 index 4bd2eea2..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCeActExtractor.scala +++ /dev/null @@ -1,14 +0,0 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.acts - -import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, MedicalAct, SsrCEAct} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.ssrce.SsrCeBasicExtractor - -final case class SsrCeActExtractor(codes: BaseExtractorCodes) extends SsrCeBasicExtractor[MedicalAct] - with StartsWithStrategy[MedicalAct] { - override def columnName: String = ColNames.CamCode - - override def eventBuilder: EventBuilder = SsrCEAct - - override def getCodes: BaseExtractorCodes = codes -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GhmExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GhmExtractor.scala deleted file mode 100644 index cd802950..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GhmExtractor.scala +++ /dev/null @@ -1,15 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors.classifications - -import fr.polytechnique.cmap.cnam.etl.events.{Classification, EventBuilder, GHMClassification} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoBasicExtractor - -final case class GhmExtractor(codes: BaseExtractorCodes) extends McoBasicExtractor[Classification] - with StartsWithStrategy[Classification] { - override val columnName: String = ColNames.GHM - override val eventBuilder: EventBuilder = GHMClassification - - override def getCodes: BaseExtractorCodes = codes -} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/codes/ExtractorCodes.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/codes/ExtractorCodes.scala new file mode 100644 index 00000000..93a065fb --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/codes/ExtractorCodes.scala @@ -0,0 +1,7 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.codes + +trait ExtractorCodes extends Serializable { + def isEmpty: Boolean +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/codes/SimpleExtractorCodes.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/codes/SimpleExtractorCodes.scala new file mode 100644 index 00000000..9a931909 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/codes/SimpleExtractorCodes.scala @@ -0,0 +1,21 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.codes + +import scala.collection.immutable.HashSet + +class SimpleExtractorCodes(val codes: List[String]) extends ExtractorCodes { + val internalCodes: HashSet[String] = codes.to[HashSet] + + override def isEmpty: Boolean = internalCodes.isEmpty + + def exists(p: String => Boolean): Boolean = internalCodes.exists(p) + + def contains(code: String): Boolean = internalCodes.contains(code) +} + +object SimpleExtractorCodes { + def empty = new SimpleExtractorCodes(List.empty) + + def apply(codes: List[String]): SimpleExtractorCodes = new SimpleExtractorCodes(codes) +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosisExtractor.scala deleted file mode 100644 index 5a0abbed..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosisExtractor.scala +++ /dev/null @@ -1,21 +0,0 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses - -import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, EventBuilder, HadAssociatedDiagnosis, HadMainDiagnosis} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.had.HadBasicExtractor - -final case class HadMainDiagnosisExtractor(codes: BaseExtractorCodes) extends HadBasicExtractor[Diagnosis] with - StartsWithStrategy[Diagnosis] { - override val columnName: String = ColNames.DP - override val eventBuilder: EventBuilder = HadMainDiagnosis - - override def getCodes: BaseExtractorCodes = codes -} - -final case class HadAssociatedDiagnosisExtractor(codes: BaseExtractorCodes) extends HadBasicExtractor[Diagnosis] with - StartsWithStrategy[Diagnosis] { - override val columnName: String = ColNames.DA - override val eventBuilder: EventBuilder = HadAssociatedDiagnosis - - override def getCodes: BaseExtractorCodes = codes -} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosisExtractor.scala deleted file mode 100644 index 8e0af3b8..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosisExtractor.scala +++ /dev/null @@ -1,28 +0,0 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses - -import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoBasicExtractor - -protected trait BasicMcoDiagnosisExtractor extends McoBasicExtractor[Diagnosis] with StartsWithStrategy[Diagnosis] - -case class McoMainDiagnosisExtractor(codes: BaseExtractorCodes) extends BasicMcoDiagnosisExtractor { - override val columnName: String = ColNames.DP - override val eventBuilder: EventBuilder = McoMainDiagnosis - - override def getCodes: BaseExtractorCodes = codes -} - -case class McoAssociatedDiagnosisExtractor(codes: BaseExtractorCodes) extends BasicMcoDiagnosisExtractor { - override val columnName: String = ColNames.DA - override val eventBuilder: EventBuilder = McoAssociatedDiagnosis - - override def getCodes: BaseExtractorCodes = codes -} - -case class McoLinkedDiagnosisExtractor(codes: BaseExtractorCodes) extends BasicMcoDiagnosisExtractor { - override val columnName: String = ColNames.DR - override val eventBuilder: EventBuilder = McoLinkedDiagnosis - - override def getCodes: BaseExtractorCodes = codes -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugConfig.scala deleted file mode 100644 index 515155a2..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugConfig.scala +++ /dev/null @@ -1,19 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors.drugs - -import fr.polytechnique.cmap.cnam.etl.extractors.{ExtractorCodes, ExtractorConfig} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.DrugClassificationLevel - -class DrugConfig( - val level: DrugClassificationLevel, - val families: List[DrugClassConfig]) extends ExtractorConfig with ExtractorCodes { - override def isEmpty: Boolean = families.isEmpty -} - -object DrugConfig { - def apply(level: DrugClassificationLevel, families: List[DrugClassConfig]): DrugConfig = new DrugConfig( - level, families - ) -} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala similarity index 74% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala index 31d6b550..5c9237e0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala @@ -1,25 +1,28 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import java.sql.Timestamp import scala.util.Try import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{BiologyDcirAct, DcirAct, EventBuilder, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir.DcirSimpleExtractor import fr.polytechnique.cmap.cnam.util.functions.makeTS -abstract sealed class DcirRowActExtractor(codes: BaseExtractorCodes) extends DcirBasicExtractor[MedicalAct] +abstract sealed class DcirRowActExtractor(codes: SimpleExtractorCodes) extends DcirSimpleExtractor[MedicalAct] with StartsWithStrategy[MedicalAct] { final val PrivateInstitutionCodes = Set(4D, 5D, 6D, 7D) + // Implementation of the Extractor Trait + override def getCodes: SimpleExtractorCodes = codes + + // Implementation of the EventRowExtractor override def usedColumns: List[String] = List(ColNames.InstitutionCode, ColNames.GHSCode, ColNames.Sector) ++ super .usedColumns - override def getCodes: BaseExtractorCodes = codes - override def extractStart(r: Row): Timestamp = { Try(super.extractStart(r)) recover { case _ => makeTS(1970, 1, 1) @@ -66,17 +69,20 @@ abstract sealed class DcirRowActExtractor(codes: BaseExtractorCodes) extends Dci private def getSector(r: Row): Double = r.getAs[Double](ColNames.Sector) } -final case class DcirMedicalActExtractor(codes: BaseExtractorCodes) +final case class DcirMedicalActExtractor(codes: SimpleExtractorCodes) extends DcirRowActExtractor(codes) { + // Implementation of the BasicExtractor Trait override val columnName: String = ColNames.CamCode override val eventBuilder: EventBuilder = DcirAct } -final case class DcirBiologyActExtractor(codes: BaseExtractorCodes) +final case class DcirBiologyActExtractor(codes: SimpleExtractorCodes) extends DcirRowActExtractor(codes) { + // Implementation of the BasicExtractor Trait override val columnName: String = ColNames.BioCode override val eventBuilder: EventBuilder = BiologyDcirAct + // Because BioCode is a Double override def extractValue(row: Row): String = row.getAs[Double](columnName).toString } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/HadCcamActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/HadCcamActExtractor.scala new file mode 100644 index 00000000..65c2baf7 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/HadCcamActExtractor.scala @@ -0,0 +1,15 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts + +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HadCCAMAct, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.had.HadSimpleExtractor + +final case class HadCcamActExtractor(codes: SimpleExtractorCodes) extends HadSimpleExtractor[MedicalAct] + with StartsWithStrategy[MedicalAct] { + override val columnName: String = ColNames.CCAM + override val eventBuilder: EventBuilder = HadCCAMAct + override def getCodes: SimpleExtractorCodes = codes +} + + diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCcamActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCcamActExtractor.scala similarity index 64% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCcamActExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCcamActExtractor.scala index 46ab225d..47f15e07 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCcamActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCcamActExtractor.scala @@ -1,24 +1,25 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import java.sql.Timestamp import me.danielpes.spark.datetime.Period import me.danielpes.spark.datetime.implicits.DateImplicits import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, McoCCAMAct, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mco.McoSimpleExtractor -final case class McoCcamActExtractor(codes: BaseExtractorCodes) extends McoBasicExtractor[MedicalAct] +final case class McoCcamActExtractor(codes: SimpleExtractorCodes) extends McoSimpleExtractor[MedicalAct] with StartsWithStrategy[MedicalAct] { override val columnName: String = ColNames.CCAM override val eventBuilder: EventBuilder = McoCCAMAct override def usedColumns: List[String] = ColNames.CCAMDelayDate :: super.usedColumns - override def getCodes: BaseExtractorCodes = codes + override def getCodes: SimpleExtractorCodes = codes override def extractStart(r: Row): Timestamp = { (r.getAs[Timestamp](NewColumns.EstimatedStayStart) + Period(days = getDateOffset(r))).get diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCeCcamActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCeCcamActExtractor.scala new file mode 100644 index 00000000..8d893ed6 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCeCcamActExtractor.scala @@ -0,0 +1,17 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts + + +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, McoCeCcamAct, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mcoce.McoCeSimpleExtractor + +final case class McoCeCcamActExtractor(codes: SimpleExtractorCodes) extends McoCeSimpleExtractor[MedicalAct] + with StartsWithStrategy[MedicalAct] { + override val eventBuilder: EventBuilder = McoCeCcamAct + override val columnName: String = ColNames.CamCode + + override def getCodes: SimpleExtractorCodes = codes +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/MedicalActsConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/MedicalActsConfig.scala similarity index 95% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/MedicalActsConfig.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/MedicalActsConfig.scala index d3bf4312..79c230de 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/MedicalActsConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/MedicalActsConfig.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrActExtractor.scala similarity index 64% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrActExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrActExtractor.scala index cea13765..733c1942 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrActExtractor.scala @@ -1,15 +1,16 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, MedicalAct, SsrCCAMAct, SsrCSARRAct} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.ssr.SsrBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr.SsrSimpleExtractor -final case class SsrCcamActExtractor(codes: BaseExtractorCodes) extends SsrBasicExtractor[MedicalAct] with +final case class SsrCcamActExtractor(codes: SimpleExtractorCodes) extends SsrSimpleExtractor[MedicalAct] with StartsWithStrategy[MedicalAct] { override val columnName: String = ColNames.CCAM override val eventBuilder: EventBuilder = SsrCCAMAct - override def getCodes: BaseExtractorCodes = codes + override def getCodes: SimpleExtractorCodes = codes } /** Extract Csarr codes : @@ -24,10 +25,10 @@ final case class SsrCcamActExtractor(codes: BaseExtractorCodes) extends SsrBasic * The complete terminology can be found here : https://drees.shinyapps.io/dico-snds/?variable=FP_PEC&search=csar&table=T_SSRaa_nnB * For more details see : https://www.atih.sante.fr/sites/default/files/public/content/3302/csarr_2018.pdf */ -final case class SsrCsarrActExtractor(codes: BaseExtractorCodes) extends SsrBasicExtractor[MedicalAct] with +final case class SsrCsarrActExtractor(codes: SimpleExtractorCodes) extends SsrSimpleExtractor[MedicalAct] with StartsWithStrategy[MedicalAct] { override val columnName: String = ColNames.CSARR override val eventBuilder: EventBuilder = SsrCSARRAct - override def getCodes: BaseExtractorCodes = codes + override def getCodes: SimpleExtractorCodes = codes } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrCeActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrCeActExtractor.scala new file mode 100644 index 00000000..55b768a0 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrCeActExtractor.scala @@ -0,0 +1,15 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts + +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, MedicalAct, SsrCEAct} +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.ssrce.SsrCeSimpleExtractor + +final case class SsrCeActExtractor(codes: SimpleExtractorCodes) extends SsrCeSimpleExtractor[MedicalAct] + with StartsWithStrategy[MedicalAct] { + override def columnName: String = ColNames.CamCode + + override def eventBuilder: EventBuilder = SsrCEAct + + override def getCodes: SimpleExtractorCodes = codes +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/classifications/GhmExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/classifications/GhmExtractor.scala new file mode 100644 index 00000000..93ab9190 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/classifications/GhmExtractor.scala @@ -0,0 +1,16 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.events.classifications + +import fr.polytechnique.cmap.cnam.etl.events.{Classification, EventBuilder, GHMClassification} +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mco.McoSimpleExtractor + +final case class GhmExtractor(codes: SimpleExtractorCodes) extends McoSimpleExtractor[Classification] + with StartsWithStrategy[Classification] { + override val columnName: String = ColNames.GHM + override val eventBuilder: EventBuilder = GHMClassification + + override def getCodes: SimpleExtractorCodes = codes +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/DiagnosesConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/DiagnosesConfig.scala similarity index 92% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/DiagnosesConfig.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/DiagnosesConfig.scala index a10c8cfa..d65ddac4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/DiagnosesConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/DiagnosesConfig.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses +package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosisExtractor.scala new file mode 100644 index 00000000..0646ece9 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosisExtractor.scala @@ -0,0 +1,22 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses + +import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, EventBuilder, HadAssociatedDiagnosis, HadMainDiagnosis} +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.had.HadSimpleExtractor + +final case class HadMainDiagnosisExtractor(codes: SimpleExtractorCodes) extends HadSimpleExtractor[Diagnosis] with + StartsWithStrategy[Diagnosis] { + override val columnName: String = ColNames.DP + override val eventBuilder: EventBuilder = HadMainDiagnosis + + override def getCodes: SimpleExtractorCodes = codes +} + +final case class HadAssociatedDiagnosisExtractor(codes: SimpleExtractorCodes) extends HadSimpleExtractor[Diagnosis] with + StartsWithStrategy[Diagnosis] { + override val columnName: String = ColNames.DA + override val eventBuilder: EventBuilder = HadAssociatedDiagnosis + + override def getCodes: SimpleExtractorCodes = codes +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbCimDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/ImbCimDiagnosisExtractor.scala similarity index 65% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbCimDiagnosisExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/ImbCimDiagnosisExtractor.scala index d4fdf9f4..f5900142 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbCimDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/ImbCimDiagnosisExtractor.scala @@ -1,15 +1,16 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses +package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses import org.apache.spark.sql.{DataFrame, Row} import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, EventBuilder, ImbCcamDiagnosis} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, IsInStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.imb.ImbBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.IsInStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.imb.ImbSimpleExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -final case class ImbCimDiagnosisExtractor(codes: BaseExtractorCodes) extends ImbBasicExtractor[Diagnosis] +final case class ImbCimDiagnosisExtractor(codes: SimpleExtractorCodes) extends ImbSimpleExtractor[Diagnosis] with IsInStrategy[Diagnosis] { override def isInExtractorScope(row: Row): Boolean = { @@ -28,5 +29,5 @@ final case class ImbCimDiagnosisExtractor(codes: BaseExtractorCodes) extends Imb override def neededColumns: List[String] = List(ColNames.PatientID, ColNames.Date, ColNames.Encoding, ColNames.Code, ColNames.EndDate) - override def getCodes: BaseExtractorCodes = codes + override def getCodes: SimpleExtractorCodes = codes } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosisExtractor.scala new file mode 100644 index 00000000..6017179b --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosisExtractor.scala @@ -0,0 +1,29 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses + +import fr.polytechnique.cmap.cnam.etl.events._ +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mco.McoSimpleExtractor + +protected trait SimpleMcoDiagnosisExtractor extends McoSimpleExtractor[Diagnosis] with StartsWithStrategy[Diagnosis] + +case class McoMainDiagnosisExtractor(codes: SimpleExtractorCodes) extends SimpleMcoDiagnosisExtractor { + override val columnName: String = ColNames.DP + override val eventBuilder: EventBuilder = McoMainDiagnosis + + override def getCodes: SimpleExtractorCodes = codes +} + +case class McoAssociatedDiagnosisExtractor(codes: SimpleExtractorCodes) extends SimpleMcoDiagnosisExtractor { + override val columnName: String = ColNames.DA + override val eventBuilder: EventBuilder = McoAssociatedDiagnosis + + override def getCodes: SimpleExtractorCodes = codes +} + +case class McoLinkedDiagnosisExtractor(codes: SimpleExtractorCodes) extends SimpleMcoDiagnosisExtractor { + override val columnName: String = ColNames.DR + override val eventBuilder: EventBuilder = McoLinkedDiagnosis + + override def getCodes: SimpleExtractorCodes = codes +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala similarity index 50% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosisExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala index e9890634..08abcd5c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala @@ -1,30 +1,31 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses +package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, EventBuilder, SsrAssociatedDiagnosis, SsrLinkedDiagnosis, SsrMainDiagnosis, SsrTakingOverPurpose} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, StartsWithStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.ssr.SsrBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr.SsrSimpleExtractor -protected sealed abstract class SsrDiagnosisExtractor(codes: BaseExtractorCodes) extends SsrBasicExtractor[Diagnosis] with +protected sealed abstract class SsrDiagnosisExtractor(codes: SimpleExtractorCodes) extends SsrSimpleExtractor[Diagnosis] with StartsWithStrategy[Diagnosis] { - override def getCodes: BaseExtractorCodes = codes + override def getCodes: SimpleExtractorCodes = codes } -final case class SsrMainDiagnosisExtractor(codes: BaseExtractorCodes) extends SsrDiagnosisExtractor(codes) { +final case class SsrMainDiagnosisExtractor(codes: SimpleExtractorCodes) extends SsrDiagnosisExtractor(codes) { override val columnName: String = ColNames.DP override val eventBuilder: EventBuilder = SsrMainDiagnosis } -final case class SsrAssociatedDiagnosisExtractor(codes: BaseExtractorCodes) extends SsrDiagnosisExtractor(codes) { +final case class SsrAssociatedDiagnosisExtractor(codes: SimpleExtractorCodes) extends SsrDiagnosisExtractor(codes) { override val columnName: String = ColNames.DA override val eventBuilder: EventBuilder = SsrAssociatedDiagnosis } -final case class SsrLinkedDiagnosisExtractor(codes: BaseExtractorCodes) extends SsrDiagnosisExtractor(codes) { +final case class SsrLinkedDiagnosisExtractor(codes: SimpleExtractorCodes) extends SsrDiagnosisExtractor(codes) { override val columnName: String = ColNames.DR override val eventBuilder: EventBuilder = SsrLinkedDiagnosis } -final case class SsrTakingOverPurposeExtractor(codes: BaseExtractorCodes) extends SsrDiagnosisExtractor(codes) { +final case class SsrTakingOverPurposeExtractor(codes: SimpleExtractorCodes) extends SsrDiagnosisExtractor(codes) { override val columnName: String = ColNames.FP_PEC override val eventBuilder: EventBuilder = SsrTakingOverPurpose } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugConfig.scala new file mode 100644 index 00000000..3fe1bd20 --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugConfig.scala @@ -0,0 +1,20 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs + +import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig +import fr.polytechnique.cmap.cnam.etl.extractors.codes.ExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.DrugClassificationLevel + +class DrugConfig( + val level: DrugClassificationLevel, + val families: List[DrugClassConfig]) extends ExtractorConfig with ExtractorCodes { + override def isEmpty: Boolean = families.isEmpty +} + +object DrugConfig { + def apply(level: DrugClassificationLevel, families: List[DrugClassConfig]): DrugConfig = new DrugConfig( + level, families + ) +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugExtractor.scala similarity index 98% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugExtractor.scala index 39db7ffb..eb7a11d6 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugExtractor.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs import java.sql.Timestamp import org.apache.commons.codec.binary.Base64 diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/DrugClassConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/DrugClassConfig.scala similarity index 73% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/DrugClassConfig.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/DrugClassConfig.scala index 75e0aee0..27d37ede 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/DrugClassConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/DrugClassConfig.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification import java.io.Serializable diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/PharmacologicalClassConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/PharmacologicalClassConfig.scala similarity index 92% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/PharmacologicalClassConfig.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/PharmacologicalClassConfig.scala index 71710a7d..7c04685a 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/PharmacologicalClassConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/PharmacologicalClassConfig.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification class PharmacologicalClassConfig( val name: String, diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Antidepresseurs.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Antidepresseurs.scala similarity index 98% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Antidepresseurs.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Antidepresseurs.scala index 1f48261c..e293d5aa 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Antidepresseurs.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Antidepresseurs.scala @@ -1,8 +1,8 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} object Antidepresseurs extends DrugClassConfig { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Antiepileptics.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Antiepileptics.scala similarity index 98% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Antiepileptics.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Antiepileptics.scala index 8efa5d44..65fa4673 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Antiepileptics.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Antiepileptics.scala @@ -1,8 +1,8 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} object Antiepileptics extends DrugClassConfig { override val name: String = "Antiepileptics" diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Antihypertenseurs.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Antihypertenseurs.scala similarity index 99% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Antihypertenseurs.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Antihypertenseurs.scala index a432c9df..40362257 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Antihypertenseurs.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Antihypertenseurs.scala @@ -1,8 +1,8 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} object Antihypertenseurs extends DrugClassConfig { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Cardiac.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Cardiac.scala similarity index 72% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Cardiac.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Cardiac.scala index ad62431f..c9e5639c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Cardiac.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Cardiac.scala @@ -1,8 +1,8 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} object Cardiac extends DrugClassConfig { override val name: String = "CardiacTherapy" diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Hypnotiques.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Hypnotiques.scala similarity index 97% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Hypnotiques.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Hypnotiques.scala index aeb60d13..e713b47c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Hypnotiques.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Hypnotiques.scala @@ -1,8 +1,8 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} object Hypnotiques extends DrugClassConfig { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Neuroleptiques.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Neuroleptiques.scala similarity index 98% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Neuroleptiques.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Neuroleptiques.scala index 1fa2a503..1f1892ae 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Neuroleptiques.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Neuroleptiques.scala @@ -1,8 +1,8 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} object Neuroleptiques extends DrugClassConfig { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Opioids.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Opioids.scala similarity index 97% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Opioids.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Opioids.scala index 9dc60a3d..2957dc56 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/Opioids.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/Opioids.scala @@ -1,8 +1,8 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} object Opioids extends DrugClassConfig { override val name: String = "Opioids" diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/ProtonPumpInhibitors.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/ProtonPumpInhibitors.scala similarity index 98% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/ProtonPumpInhibitors.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/ProtonPumpInhibitors.scala index 817560fb..5fa3c333 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/classification/families/ProtonPumpInhibitors.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/classification/families/ProtonPumpInhibitors.scala @@ -1,8 +1,8 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} object ProtonPumpInhibitors extends DrugClassConfig { override val name: String = "ProtonPumpInhibitors" diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/Cip13Level.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/Cip13Level.scala similarity index 72% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/Cip13Level.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/Cip13Level.scala index f9587f11..ae7177af 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/Cip13Level.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/Cip13Level.scala @@ -1,9 +1,9 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.level +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.DrugClassConfig object Cip13Level extends DrugClassificationLevel { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/DrugClassificationLevel.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/DrugClassificationLevel.scala similarity index 78% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/DrugClassificationLevel.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/DrugClassificationLevel.scala index 101c872d..f4cd3476 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/DrugClassificationLevel.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/DrugClassificationLevel.scala @@ -1,9 +1,9 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.level +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.DrugClassConfig trait DrugClassificationLevel extends Serializable { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/MoleculeCombinationLevel.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/MoleculeCombinationLevel.scala similarity index 72% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/MoleculeCombinationLevel.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/MoleculeCombinationLevel.scala index b44d7164..e4bf42d7 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/MoleculeCombinationLevel.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/MoleculeCombinationLevel.scala @@ -1,9 +1,9 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.level +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.DrugClassConfig object MoleculeCombinationLevel extends DrugClassificationLevel { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/PharmacologicalLevel.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/PharmacologicalLevel.scala similarity index 81% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/PharmacologicalLevel.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/PharmacologicalLevel.scala index 0160af29..a5174762 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/PharmacologicalLevel.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/PharmacologicalLevel.scala @@ -1,9 +1,9 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.level +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.DrugClassConfig object PharmacologicalLevel extends DrugClassificationLevel { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/TherapeuticLevel.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/TherapeuticLevel.scala similarity index 78% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/TherapeuticLevel.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/TherapeuticLevel.scala index 6465ca32..34f71867 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/TherapeuticLevel.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/TherapeuticLevel.scala @@ -1,9 +1,9 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.level +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.DrugClassConfig object TherapeuticLevel extends DrugClassificationLevel { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/HadHospitalStaysExtractor.scala similarity index 53% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStaysExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/HadHospitalStaysExtractor.scala index 46eb0805..99b1852e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStaysExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/HadHospitalStaysExtractor.scala @@ -1,12 +1,13 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays +package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import java.sql.{Date, Timestamp} import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HadHospitalStay, HospitalStay} -import fr.polytechnique.cmap.cnam.etl.extractors.{AlwaysTrueStrategy, BaseExtractorCodes} -import fr.polytechnique.cmap.cnam.etl.extractors.had.HadBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.AlwaysTrueStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.had.HadSimpleExtractor -object HadHospitalStaysExtractor extends HadBasicExtractor[HospitalStay] +object HadHospitalStaysExtractor extends HadSimpleExtractor[HospitalStay] with AlwaysTrueStrategy[HospitalStay] { override val columnName: String = ColNames.EndDate override val eventBuilder: EventBuilder = HadHospitalStay @@ -15,5 +16,5 @@ object HadHospitalStaysExtractor extends HadBasicExtractor[HospitalStay] override def extractEnd(r: Row): Option[Timestamp] = Some(new Timestamp(r.getAs[Date](ColNames.EndDate).getTime)) - override def getCodes: BaseExtractorCodes = BaseExtractorCodes.empty + override def getCodes: SimpleExtractorCodes = SimpleExtractorCodes.empty } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoHospitalStaysExtractor.scala similarity index 72% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoHospitalStaysExtractor.scala index c63058ba..3ebcd417 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStaysExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoHospitalStaysExtractor.scala @@ -1,15 +1,16 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays +package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import java.sql.{Date, Timestamp} import scala.util.Try import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HospitalStay, McoHospitalStay} -import fr.polytechnique.cmap.cnam.etl.extractors.{AlwaysTrueStrategy, BaseExtractorCodes} -import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.AlwaysTrueStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mco.McoSimpleExtractor -object McoHospitalStaysExtractor extends McoBasicExtractor[HospitalStay] with AlwaysTrueStrategy[HospitalStay] { +object McoHospitalStaysExtractor extends McoSimpleExtractor[HospitalStay] with AlwaysTrueStrategy[HospitalStay] { - override def getCodes: BaseExtractorCodes = BaseExtractorCodes.empty + override def getCodes: SimpleExtractorCodes = SimpleExtractorCodes.empty override def columnName: String = ColNames.EndDate diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoceEmergenciesExtractor.scala similarity index 55% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoceEmergenciesExtractor.scala index 5ded697b..16d450cb 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoceEmergenciesExtractor.scala @@ -1,30 +1,30 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays +package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import java.sql.{Date, Timestamp} import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HospitalStay, McoceEmergency} -import fr.polytechnique.cmap.cnam.etl.extractors.{AlwaysTrueStrategy, BaseExtractorCodes} -import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.AlwaysTrueStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mcoce.McoCeSimpleExtractor -object McoceEmergenciesExtractor extends McoCeBasicExtractor[HospitalStay] with AlwaysTrueStrategy[HospitalStay] { +object McoceEmergenciesExtractor extends McoCeSimpleExtractor[HospitalStay] with AlwaysTrueStrategy[HospitalStay] { - /** Checks if the passed Row has the information needed to build the Event. - * - * @param row The row itself. - * @return A boolean value. - */ + // Extractor trait override def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(ColNames.ActCode)) && row .getAs[String](ColNames.ActCode).startsWith("ATU") + override def getCodes: SimpleExtractorCodes = SimpleExtractorCodes.empty + + // EventRowExtractor trait override def extractEnd(r: Row): Option[Timestamp] = Some(new Timestamp(r.getAs[Date](ColNames.EndDate).getTime)) override def extractValue(row: Row): String = extractGroupId(row) + override def usedColumns: List[String] = List(ColNames.EndDate) ++ super.usedColumns + + // SimpleExtractor trait override def columnName: String = ColNames.ActCode override def eventBuilder: EventBuilder = McoceEmergency - override def usedColumns: List[String] = List(ColNames.EndDate) ++ super.usedColumns - - override def getCodes: BaseExtractorCodes = BaseExtractorCodes.empty } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SsrHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/SsrHospitalStaysExtractor.scala similarity index 61% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SsrHospitalStaysExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/SsrHospitalStaysExtractor.scala index 42a67fe8..b1e335ea 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SsrHospitalStaysExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/SsrHospitalStaysExtractor.scala @@ -1,12 +1,13 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays +package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import java.sql.{Date, Timestamp} import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HospitalStay, SsrHospitalStay} -import fr.polytechnique.cmap.cnam.etl.extractors.{AlwaysTrueStrategy, BaseExtractorCodes} -import fr.polytechnique.cmap.cnam.etl.extractors.ssr.SsrBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.AlwaysTrueStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr.SsrSimpleExtractor -object SsrHospitalStaysExtractor extends SsrBasicExtractor[HospitalStay] with AlwaysTrueStrategy[HospitalStay] { +object SsrHospitalStaysExtractor extends SsrSimpleExtractor[HospitalStay] with AlwaysTrueStrategy[HospitalStay] { override val columnName: String = ColNames.EndDate override val eventBuilder: EventBuilder = SsrHospitalStay @@ -22,5 +23,5 @@ object SsrHospitalStaysExtractor extends SsrBasicExtractor[HospitalStay] with Al r.getAs[Int](ColNames.Year).toString } - override def getCodes: BaseExtractorCodes = BaseExtractorCodes.empty + override def getCodes: SimpleExtractorCodes = SimpleExtractorCodes.empty } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchases.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/DcirMoleculePurchases.scala similarity index 98% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchases.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/DcirMoleculePurchases.scala index a91f3e16..81ea1ab8 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchases.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/DcirMoleculePurchases.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.molecules +package fr.polytechnique.cmap.cnam.etl.extractors.events.molecules import java.sql.Timestamp import org.apache.spark.sql.{Column, DataFrame, Row} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchases.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/MoleculePurchases.scala similarity index 83% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchases.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/MoleculePurchases.scala index fa6284c9..9d186d4b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchases.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/MoleculePurchases.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.molecules +package fr.polytechnique.cmap.cnam.etl.extractors.events.molecules import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events._ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/MoleculePurchasesConfig.scala similarity index 79% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesConfig.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/MoleculePurchasesConfig.scala index aac8a322..3be421fd 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/MoleculePurchasesConfig.scala @@ -1,8 +1,9 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.molecules +package fr.polytechnique.cmap.cnam.etl.extractors.events.molecules -import fr.polytechnique.cmap.cnam.etl.extractors.{ExtractorCodes, ExtractorConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig +import fr.polytechnique.cmap.cnam.etl.extractors.codes.ExtractorCodes /** * Base definition of the config needed by the MoleculePurchases extractor. diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/DcirNgapActExtractor.scala similarity index 95% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/DcirNgapActExtractor.scala index 8e465de2..3528fd06 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/DcirNgapActExtractor.scala @@ -1,13 +1,13 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts +package fr.polytechnique.cmap.cnam.etl.extractors.events.ngapacts import org.apache.spark.sql.{Column, DataFrame, Row} import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.{DcirNgapAct, Event, EventBuilder, NgapAct} import fr.polytechnique.cmap.cnam.etl.extractors.Extractor -import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirRowExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir.DcirRowExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources final case class DcirNgapActExtractor(ngapActsConfig: NgapActConfig[NgapWithNatClassConfig]) @@ -73,7 +73,7 @@ final case class DcirNgapActExtractor(ngapActsConfig: NgapActConfig[NgapWithNatC * * @return concatenation of the three codes */ - def extractValue(row: Row): String = { + override def extractValue(row: Row): String = { s"${row.getAs[Int](ColNames.NaturePrestation)}_${row.getAs[String](ngapKeyLetterCol)}_${ row.getAs[Double](ColNames.NgapCoefficient).toString }" diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala similarity index 94% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala index 145f69ef..d2349961 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoCeNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala @@ -1,13 +1,13 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts +package fr.polytechnique.cmap.cnam.etl.extractors.events.ngapacts import scala.util.Try import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.{Event, EventBuilder, McoCeFbstcNgapAct, McoCeFcstcNgapAct, NgapAct} import fr.polytechnique.cmap.cnam.etl.extractors.Extractor -import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeRowExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mcoce.McoCeRowExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources sealed abstract class McoCeNgapActExtractor(ngapActsConfig: NgapActConfig[NgapActClassConfig]) extends Extractor[NgapAct, NgapActConfig[NgapActClassConfig]] @@ -20,7 +20,7 @@ sealed abstract class McoCeNgapActExtractor(ngapActsConfig: NgapActConfig[NgapAc // Implementation of the EventRowExtractor override def usedColumns: List[String] = super.usedColumns ++ List(keyLetterColumn, coeffColumn) - def extractValue(row: Row): String = { + override def extractValue(row: Row): String = { val letter = getNgapLetter(row) val coeff = getNgapCoeff(row) s"PmsiCe_${letter}_${coeff}" diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/NgapActClassConfig.scala similarity index 92% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/NgapActClassConfig.scala index ee2da23f..bd585b12 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActClassConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/NgapActClassConfig.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts +package fr.polytechnique.cmap.cnam.etl.extractors.events.ngapacts //ngapCoefficients should always be specified with the dot separation for float, as this is how they are coded in the snds. // eg: "2.0" should be used instead of "2" diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/NgapActConfig.scala similarity index 89% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/NgapActConfig.scala index 86ede2ed..d93f3fe4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/NgapActConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/NgapActConfig.scala @@ -1,8 +1,9 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts +package fr.polytechnique.cmap.cnam.etl.extractors.events.ngapacts -import fr.polytechnique.cmap.cnam.etl.extractors.{ExtractorCodes, ExtractorConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig +import fr.polytechnique.cmap.cnam.etl.extractors.codes.ExtractorCodes /** * NgapActConfig defines three different ways to filter for specific ngap acts in the SNDS : diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/McoCeSpecialtyExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/McoCeSpecialtyExtractor.scala similarity index 62% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/McoCeSpecialtyExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/McoCeSpecialtyExtractor.scala index 1ce4bf82..f445a4bb 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/McoCeSpecialtyExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/McoCeSpecialtyExtractor.scala @@ -1,11 +1,12 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.prestations +package fr.polytechnique.cmap.cnam.etl.extractors.events.prestations import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, McoCeFbstcMedicalPractitionerClaim, McoCeFcstcMedicalPractitionerClaim, PractitionerClaimSpeciality} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, IsInStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.IsInStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mcoce.McoCeSimpleExtractor /** * Get specialties of the non medical practitioners in the MCO_CE: @@ -13,7 +14,7 @@ import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeBasicExtractor * These two columns are complementary as described here : * https://documentation-snds.health-data-hub.fr/fiches/actes_consult_externes.html#les-tables-du-pmsi-version-snds-pour-les-ace **/ -sealed abstract class McoCeSpecialtyExtractor(codes: BaseExtractorCodes) extends McoCeBasicExtractor[PractitionerClaimSpeciality] +sealed abstract class McoCeSpecialtyExtractor(codes: SimpleExtractorCodes) extends McoCeSimpleExtractor[PractitionerClaimSpeciality] with IsInStrategy[PractitionerClaimSpeciality] { override def extractValue(row: Row): String = row.getAs[Int](columnName).toString @@ -21,16 +22,16 @@ sealed abstract class McoCeSpecialtyExtractor(codes: BaseExtractorCodes) extends (!row.isNullAt(row.fieldIndex(columnName))) & (row.getAs[Integer](columnName) != 0) } - override def getCodes: BaseExtractorCodes = codes + override def getCodes: SimpleExtractorCodes = codes } -final case class McoCeFbstcSpecialtyExtractor(codes: BaseExtractorCodes) extends McoCeSpecialtyExtractor(codes) { +final case class McoCeFbstcSpecialtyExtractor(codes: SimpleExtractorCodes) extends McoCeSpecialtyExtractor(codes) { override val columnName: String = ColNames.PractitionnerSpecialtyFbstc override val eventBuilder: EventBuilder = McoCeFbstcMedicalPractitionerClaim } -final case class McoCeFcstcSpecialtyExtractor(codes: BaseExtractorCodes) extends McoCeSpecialtyExtractor(codes) { +final case class McoCeFcstcSpecialtyExtractor(codes: SimpleExtractorCodes) extends McoCeSpecialtyExtractor(codes) { override val columnName: String = ColNames.PractitionnerSpecialtyFcstc override val eventBuilder: EventBuilder = McoCeFcstcMedicalPractitionerClaim } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/PractitionerClaimSpecialityConfig.scala similarity index 92% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityConfig.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/PractitionerClaimSpecialityConfig.scala index 56f4c461..0eb362f1 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/PractitionerClaimSpecialityConfig.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.prestations +package fr.polytechnique.cmap.cnam.etl.extractors.events.prestations import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/PractitionerClaimSpecialityExtractor.scala similarity index 71% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/PractitionerClaimSpecialityExtractor.scala index 7837f38d..c4bd7f6e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/PractitionerClaimSpecialityExtractor.scala @@ -1,16 +1,17 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.prestations +package fr.polytechnique.cmap.cnam.etl.extractors.events.prestations import java.sql.Timestamp import scala.util.Try import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, IsInStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.IsInStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir.DcirSimpleExtractor -sealed abstract class DcirPractitionerSpecialityExtractor(codes: BaseExtractorCodes) - extends DcirBasicExtractor[PractitionerClaimSpeciality] with IsInStrategy[PractitionerClaimSpeciality] { +sealed abstract class DcirPractitionerSpecialityExtractor(codes: SimpleExtractorCodes) + extends DcirSimpleExtractor[PractitionerClaimSpeciality] with IsInStrategy[PractitionerClaimSpeciality] { override def usedColumns: List[ColName] = ColNames.ExecPSNum :: super.usedColumns @@ -30,7 +31,7 @@ sealed abstract class DcirPractitionerSpecialityExtractor(codes: BaseExtractorCo (!row.isNullAt(row.fieldIndex(columnName))) & (row.getAs[Integer](columnName) != 0) } - override def getCodes: BaseExtractorCodes = codes + override def getCodes: SimpleExtractorCodes = codes } /** @@ -38,7 +39,7 @@ sealed abstract class DcirPractitionerSpecialityExtractor(codes: BaseExtractorCo * If a specialty is available, it extracts the specialty using PSE_SPE_COD and the practitioner * identifier from the database. */ -final case class MedicalPractitionerClaimExtractor(codes: BaseExtractorCodes) +final case class MedicalPractitionerClaimExtractor(codes: SimpleExtractorCodes) extends DcirPractitionerSpecialityExtractor(codes) { override val columnName: String = ColNames.MSpe override val eventBuilder: EventBuilder = MedicalPractitionerClaim @@ -50,7 +51,7 @@ final case class MedicalPractitionerClaimExtractor(codes: BaseExtractorCodes) * If a specialty is available, it extracts the specialty using PSE_ACT_NAT and the practitioner * identifier from the database. */ -final case class NonMedicalPractitionerClaimExtractor(codes: BaseExtractorCodes) +final case class NonMedicalPractitionerClaimExtractor(codes: SimpleExtractorCodes) extends DcirPractitionerSpecialityExtractor(codes) { override val columnName: String = ColNames.NonMSpe override val eventBuilder: EventBuilder = NonMedicalPractitionerClaim diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOverReasonExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/takeoverreasons/HadTakeOverReasonExtractor.scala similarity index 50% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOverReasonExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/takeoverreasons/HadTakeOverReasonExtractor.scala index e8a83990..95c9e0df 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOverReasonExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/takeoverreasons/HadTakeOverReasonExtractor.scala @@ -1,11 +1,12 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.takeOverReasons +package fr.polytechnique.cmap.cnam.etl.extractors.events.takeoverreasons import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HadAssociatedTakeOver, HadMainTakeOver, MedicalTakeOverReason} -import fr.polytechnique.cmap.cnam.etl.extractors.{BaseExtractorCodes, IsInStrategy} -import fr.polytechnique.cmap.cnam.etl.extractors.had.HadBasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.IsInStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.had.HadSimpleExtractor -final case class HadMainTakeOverExtractor(codes: BaseExtractorCodes) extends HadBasicExtractor[MedicalTakeOverReason] +final case class HadMainTakeOverExtractor(codes: SimpleExtractorCodes) extends HadSimpleExtractor[MedicalTakeOverReason] with IsInStrategy[MedicalTakeOverReason] { override val columnName: String = ColNames.PEC_PAL @@ -13,15 +14,15 @@ final case class HadMainTakeOverExtractor(codes: BaseExtractorCodes) extends Had override def extractValue(row: Row): String = row.getAs[Int](columnName).toString - override def getCodes: BaseExtractorCodes = codes + override def getCodes: SimpleExtractorCodes = codes } -final case class HadAssociatedTakeOverExtractor(codes: BaseExtractorCodes) extends HadBasicExtractor[MedicalTakeOverReason] +final case class HadAssociatedTakeOverExtractor(codes: SimpleExtractorCodes) extends HadSimpleExtractor[MedicalTakeOverReason] with IsInStrategy[MedicalTakeOverReason] { override val columnName: String = ColNames.PEC_ASS override val eventBuilder: EventBuilder = HadAssociatedTakeOver override def extractValue(row: Row): String = row.getAs[Int](columnName).toString - override def getCodes: BaseExtractorCodes = codes + override def getCodes: SimpleExtractorCodes = codes } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractor.scala similarity index 97% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirRowExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractor.scala index 1f8d49ef..2dcb3db2 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractor.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.dcir +package fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir import java.sql.Timestamp import scala.util.Try diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirSimpleExtractor.scala similarity index 57% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirBasicExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirSimpleExtractor.scala index 0bb81912..ccbf8f12 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirBasicExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirSimpleExtractor.scala @@ -1,13 +1,13 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.dcir +package fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.AnyEvent -import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.SimpleExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -trait DcirBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with DcirRowExtractor { +trait DcirSimpleExtractor[EventType <: AnyEvent] extends DcirRowExtractor with SimpleExtractor[EventType] { def getInput(sources: Sources): DataFrame = sources.dcir.get.select(neededColumns.map(col): _*) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirSource.scala similarity index 72% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirSource.scala index 009b1295..52d59b66 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/dcir/DcirSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirSource.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.dcir +package fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames @@ -19,27 +19,6 @@ trait DcirSource extends ColumnNames { lazy val Sector: String = "ER_ETE_F__PRS_PPU_SEC" lazy val NaturePrestation: ColName = "PRS_NAT_REF" lazy val NgapCoefficient: ColName = "PRS_ACT_CFT" - - lazy val core = ( - PatientID, DcirEventStart, DcirFluxDate, FlowDistributionDate, FlowTreatementDate, - FlowEmitterId, FlowEmitterNumber, OrderId, OrgId - ) - lazy val all = List( - PatientID, - CamCode, - GHSCode, - InstitutionCode, - Sector, - DcirEventStart, - MSpe, - NonMSpe, - ExecPSNum, - DcirFluxDate, - NaturePrestation, - NgapCoefficient - ) - - lazy val FlowDistributionDate: ColName = "FLX_DIS_DTD" lazy val FlowTreatementDate: ColName = "FLX_TRT_DTD" lazy val FlowEmitterType: ColName = "FLX_EMT_TYP" diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala similarity index 92% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadRowExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala index 0238c691..d504b9fe 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.had +package fr.polytechnique.cmap.cnam.etl.extractors.sources.had import java.sql.Timestamp import org.apache.spark.sql.Row diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSimpleExtractor.scala similarity index 60% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadBasicExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSimpleExtractor.scala index a7db967e..a2b6acb9 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadBasicExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSimpleExtractor.scala @@ -1,14 +1,14 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.had +package fr.polytechnique.cmap.cnam.etl.extractors.sources.had import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.AnyEvent -import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.SimpleExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -trait HadBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with HadRowExtractor { +trait HadSimpleExtractor[EventType <: AnyEvent] extends HadRowExtractor with SimpleExtractor[EventType] { override def getInput(sources: Sources): DataFrame = sources.had.get.estimateStayStartTime .select(neededColumns.map(col): _*) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSource.scala similarity index 97% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSource.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSource.scala index ab4163a9..4bf302ec 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSource.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.had +package fr.polytechnique.cmap.cnam.etl.extractors.sources.had import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbRowExtractor.scala similarity index 93% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbRowExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbRowExtractor.scala index 547bf771..105dea5e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbRowExtractor.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.imb +package fr.polytechnique.cmap.cnam.etl.extractors.sources.imb import java.sql.{Date, Timestamp} import scala.util.Try @@ -18,8 +18,6 @@ import fr.polytechnique.cmap.cnam.util.functions.makeTS */ trait ImbRowExtractor extends ImbSource with EventRowExtractor { - def extractCode(row: Row): String = row.getAs[String](ColNames.Code) - def extractEncoding(row: Row): String = row.getAs[String](ColNames.Encoding) override def extractPatientId(row: Row): String = row.getAs[String](ColNames.PatientID) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbSimpleExtractor.scala similarity index 52% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbBasicExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbSimpleExtractor.scala index d068cad5..3bdf4a24 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbBasicExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbSimpleExtractor.scala @@ -1,15 +1,13 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.imb +package fr.polytechnique.cmap.cnam.etl.extractors.sources.imb import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.AnyEvent -import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.SimpleExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -trait ImbBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with ImbRowExtractor { +trait ImbSimpleExtractor[EventType <: AnyEvent] extends ImbRowExtractor with SimpleExtractor[EventType]{ def getInput(sources: Sources): DataFrame = sources.irImb.get.select(neededColumns.map(col): _*) - - override def usedColumns: List[String] = super.usedColumns } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbSource.scala similarity index 86% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbSource.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbSource.scala index 0939dcc4..bc2f1ca4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/imb/ImbSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbSource.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.imb +package fr.polytechnique.cmap.cnam.etl.extractors.sources.imb import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractor.scala similarity index 95% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoRowExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractor.scala index 6d40845c..6cfaae64 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractor.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.mco +package fr.polytechnique.cmap.cnam.etl.extractors.sources.mco import java.sql.Timestamp import org.apache.spark.sql.Row diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSimpleExtractor.scala similarity index 59% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoBasicExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSimpleExtractor.scala index e11dbac3..f131226a 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoBasicExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSimpleExtractor.scala @@ -1,14 +1,14 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.mco +package fr.polytechnique.cmap.cnam.etl.extractors.sources.mco import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.AnyEvent -import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.SimpleExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -trait McoBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with McoRowExtractor { +trait McoSimpleExtractor[EventType <: AnyEvent] extends McoRowExtractor with SimpleExtractor[EventType]{ def getInput(sources: Sources): DataFrame = sources.mco.get.select(neededColumns.map(col): _*).estimateStayStartTime } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSource.scala similarity index 98% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSource.scala index ed3ffa66..26e57fa0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSource.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.mco +package fr.polytechnique.cmap.cnam.etl.extractors.sources.mco import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractor.scala similarity index 93% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeRowExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractor.scala index 489c2bb6..7be5b990 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractor.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.mcoCe +package fr.polytechnique.cmap.cnam.etl.extractors.sources.mcoce import java.sql.Timestamp import org.apache.spark.sql.Row diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeSimpleExtractor.scala similarity index 54% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeBasicExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeSimpleExtractor.scala index 8fd4da6d..38096ea3 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeBasicExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeSimpleExtractor.scala @@ -1,11 +1,11 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.mcoCe +package fr.polytechnique.cmap.cnam.etl.extractors.sources.mcoce import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.AnyEvent -import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.SimpleExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -trait McoCeBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with McoCeRowExtractor { +trait McoCeSimpleExtractor[EventType <: AnyEvent] extends McoCeRowExtractor with SimpleExtractor[EventType]{ def getInput(sources: Sources): DataFrame = sources.mcoCe.get.select(neededColumns.map(col): _*) } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeSource.scala similarity index 95% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeSource.scala index 6c53b728..a1f73b82 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/mcoCe/McoCeSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeSource.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.mcoCe +package fr.polytechnique.cmap.cnam.etl.extractors.sources.mcoce import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala similarity index 91% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrRowExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala index 2c761e54..0414e254 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala @@ -1,10 +1,10 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.ssr +package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr import java.sql.Timestamp import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} -import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor, ExtractorCodes} +import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources trait SsrRowExtractor extends SsrSource with EventRowExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSimpleExtractor.scala similarity index 59% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrBasicExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSimpleExtractor.scala index e33ad4d6..d21640bc 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrBasicExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSimpleExtractor.scala @@ -1,13 +1,13 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.ssr +package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.AnyEvent -import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.SimpleExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -trait SsrBasicExtractor[EventType <: AnyEvent] extends BasicExtractor[EventType] with SsrRowExtractor { +trait SsrSimpleExtractor[EventType <: AnyEvent] extends SsrRowExtractor with SimpleExtractor[EventType]{ def getInput(sources: Sources): DataFrame = sources.ssr.get.estimateStayStartTime.select(neededColumns.map(col): _*) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSource.scala similarity index 98% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSource.scala index b6efc9e1..9f91beb4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSource.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.ssr +package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractor.scala similarity index 88% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeRowExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractor.scala index 0b640fd1..0f728d57 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractor.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.ssrce +package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssrce import java.sql.Timestamp import org.apache.spark.sql.Row diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeBasicExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeSimpleExtractor.scala similarity index 57% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeBasicExtractor.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeSimpleExtractor.scala index 6b10e010..51365d49 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeBasicExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeSimpleExtractor.scala @@ -1,14 +1,13 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.ssrce +package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssrce import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.col import fr.polytechnique.cmap.cnam.etl.events.AnyEvent -import fr.polytechnique.cmap.cnam.etl.extractors.BasicExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.SimpleExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources -trait SsrCeBasicExtractor [EventType <: AnyEvent] extends BasicExtractor[EventType] with SsrCeRowExtractor { +trait SsrCeSimpleExtractor [EventType <: AnyEvent] extends SsrCeRowExtractor with SimpleExtractor[EventType]{ def getInput(sources: Sources): DataFrame = sources.ssrCe.get.select(neededColumns.map(col): _*) - } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeSource.scala similarity index 87% rename from src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeSource.scala rename to src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeSource.scala index 568ba0a1..bc970006 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssrce/SsrCeSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeSource.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.ssrce +package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssrce import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala index edaaac65..48470be2 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfig.scala @@ -6,9 +6,9 @@ import java.time.LocalDate import pureconfig.generic.auto._ import fr.polytechnique.cmap.cnam.etl.config.BaseConfig import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.DrugConfig -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.{Cip13Level, DrugClassificationLevel} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.DrugConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.{Cip13Level, DrugClassificationLevel} case class BulkConfig( input: StudyConfig.InputPaths, diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfigLoader.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfigLoader.scala index 28d48c42..d194a5d4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfigLoader.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkConfigLoader.scala @@ -2,7 +2,7 @@ package fr.polytechnique.cmap.cnam.study.bulk import pureconfig.ConfigReader import fr.polytechnique.cmap.cnam.etl.config.ConfigLoader -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.DrugClassConfig import fr.polytechnique.cmap.cnam.study.fall.config.FallDrugClassConfig class BulkConfigLoader extends ConfigLoader { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala index 0d8442a0..af5724e6 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala @@ -5,10 +5,10 @@ package fr.polytechnique.cmap.cnam.study.bulk import java.io.PrintWriter import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main -import fr.polytechnique.cmap.cnam.etl.extractors.acts.{DcirMedicalActExtractor, McoCcamActExtractor, McoCeCcamActExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.classifications.GhmExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.DrugExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.{DcirMedicalActExtractor, McoCcamActExtractor, McoCeCcamActExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.classifications.GhmExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses._ +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.DrugExtractor import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.sources.Sources diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala index c692606c..8b5fc199 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala @@ -6,7 +6,7 @@ import scala.collection.mutable import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event, FollowUp, Outcome} -import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoHospitalStaysExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters import fr.polytechnique.cmap.cnam.etl.implicits diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala index 233fe62a..25e5adc0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala @@ -6,7 +6,7 @@ import scala.collection.mutable import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events.DcirAct -import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoHospitalStaysExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.patients.Patient diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala index a5124dc0..b4e15d68 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala @@ -8,12 +8,12 @@ import me.danielpes.spark.datetime.implicits._ import pureconfig.generic.auto._ import fr.polytechnique.cmap.cnam.etl.config.BaseConfig import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig -import fr.polytechnique.cmap.cnam.etl.extractors.acts.MedicalActsConfig -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.DiagnosesConfig -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.DrugConfig -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification._ -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.{Antidepresseurs, Antihypertenseurs, Hypnotiques, Neuroleptiques} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.{DrugClassificationLevel, TherapeuticLevel} +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.MedicalActsConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses.DiagnosesConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.DrugConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification._ +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.{Antidepresseurs, Antihypertenseurs, Hypnotiques, Neuroleptiques} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.{DrugClassificationLevel, TherapeuticLevel} import fr.polytechnique.cmap.cnam.etl.transformers.exposures._ import fr.polytechnique.cmap.cnam.etl.transformers.interaction.InteractionTransformerConfig import fr.polytechnique.cmap.cnam.study.fall.codes._ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigLoader.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigLoader.scala index 514bb6ec..98edb02a 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigLoader.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigLoader.scala @@ -4,7 +4,7 @@ package fr.polytechnique.cmap.cnam.study.fall.config import pureconfig.ConfigReader import fr.polytechnique.cmap.cnam.etl.config.ConfigLoader -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.DrugClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.DrugClassConfig import fr.polytechnique.cmap.cnam.study.fall.fractures.BodySite class FallConfigLoader extends ConfigLoader { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallDrugClassConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallDrugClassConfig.scala index 9c0211cf..cfae338b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallDrugClassConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallDrugClassConfig.scala @@ -2,8 +2,8 @@ package fr.polytechnique.cmap.cnam.study.fall.config -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification._ -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.{Antidepresseurs, Antihypertenseurs, Hypnotiques, Neuroleptiques} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification._ +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.{Antidepresseurs, Antihypertenseurs, Hypnotiques, Neuroleptiques} object FallDrugClassConfig { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala index 10c2b2e3..48590593 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala @@ -4,18 +4,18 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, Event, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.acts._ +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.unionDatasets class ActsExtractor(config: MedicalActsConfig) { def extract(sources: Sources): Dataset[Event[MedicalAct]] = { - val dcirMedicalAct = DcirMedicalActExtractor(BaseExtractorCodes(config.dcirCodes)).extract(sources) + val dcirMedicalAct = DcirMedicalActExtractor(SimpleExtractorCodes(config.dcirCodes)).extract(sources) .filter(act => act.groupID != DcirAct.groupID.Unknown) // filter out unknown source acts .filter(act => act.groupID != DcirAct.groupID.PublicAmbulatory) //filter out public amb - val mcoCEMedicalActs = McoCeCcamActExtractor(BaseExtractorCodes(config.mcoCECodes)).extract(sources) - val mcoMedicalActs = McoCcamActExtractor(BaseExtractorCodes(config.mcoCCAMCodes)).extract(sources) + val mcoCEMedicalActs = McoCeCcamActExtractor(SimpleExtractorCodes(config.mcoCECodes)).extract(sources) + val mcoMedicalActs = McoCcamActExtractor(SimpleExtractorCodes(config.mcoCCAMCodes)).extract(sources) unionDatasets(dcirMedicalAct, mcoCEMedicalActs, mcoMedicalActs) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala index c4024851..012224eb 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala @@ -4,9 +4,9 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.{DrugConfig, DrugExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.Cardiac -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.TherapeuticLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.Cardiac +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.TherapeuticLevel import fr.polytechnique.cmap.cnam.etl.sources.Sources object CardiacExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ControlDrugs.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ControlDrugs.scala index f26e8ecc..337a0ed1 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ControlDrugs.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ControlDrugs.scala @@ -4,9 +4,9 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families._ -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.TherapeuticLevel -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.{DrugConfig, DrugExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families._ +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.TherapeuticLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources object ControlDrugs { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala index 83132c9e..d8d6edff 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala @@ -4,8 +4,8 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.unionDatasets @@ -13,9 +13,9 @@ class DiagnosisExtractor(config: DiagnosesConfig) { def extract(sources: Sources): Dataset[Event[Diagnosis]] = { - val mainDiag = MainDiagnosisFallExtractor(BaseExtractorCodes(config.dpCodes)).extract(sources) - val linkedDiag = LinkedDiagnosisFallExtractor(BaseExtractorCodes(config.drCodes)).extract(sources) - val dasDiag = AssociatedDiagnosisFallExtractor(BaseExtractorCodes(config.daCodes)).extract(sources) + val mainDiag = MainDiagnosisFallExtractor(SimpleExtractorCodes(config.dpCodes)).extract(sources) + val linkedDiag = LinkedDiagnosisFallExtractor(SimpleExtractorCodes(config.drCodes)).extract(sources) + val dasDiag = AssociatedDiagnosisFallExtractor(SimpleExtractorCodes(config.daCodes)).extract(sources) unionDatasets(mainDiag, linkedDiag, dasDiag) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DrugsExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DrugsExtractor.scala index 2b0e589a..eb77da08 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DrugsExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DrugsExtractor.scala @@ -4,7 +4,7 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.{DrugConfig, DrugExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources class DrugsExtractor(drugConfig: DrugConfig) { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/EpilepticsExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/EpilepticsExtractor.scala index ba5c0bd3..4be0393c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/EpilepticsExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/EpilepticsExtractor.scala @@ -4,17 +4,17 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.{ImbCimDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses.{ImbCimDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.unionDatasets object EpilepticsExtractor { def extract(sources: Sources): Dataset[Event[Diagnosis]] = { - val mainDiag = McoMainDiagnosisExtractor(BaseExtractorCodes(List("G40"))).extract(sources) - val linkedDiag = McoLinkedDiagnosisExtractor(BaseExtractorCodes(List("G40"))).extract(sources) - val imbDiag = ImbCimDiagnosisExtractor(BaseExtractorCodes(List("G40"))).extract(sources) + val mainDiag = McoMainDiagnosisExtractor(SimpleExtractorCodes(List("G40"))).extract(sources) + val linkedDiag = McoLinkedDiagnosisExtractor(SimpleExtractorCodes(List("G40"))).extract(sources) + val imbDiag = ImbCimDiagnosisExtractor(SimpleExtractorCodes(List("G40"))).extract(sources) unionDatasets(mainDiag, linkedDiag, imbDiag) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala index 5e468855..44efea4e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala @@ -4,9 +4,9 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.{DrugConfig, DrugExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.Antihypertenseurs -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.TherapeuticLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.Antihypertenseurs +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.TherapeuticLevel import fr.polytechnique.cmap.cnam.etl.sources.Sources object HTAExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala index a520d1c7..2a7f438c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala @@ -4,9 +4,9 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.{DrugConfig, DrugExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.ProtonPumpInhibitors -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.TherapeuticLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.ProtonPumpInhibitors +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.TherapeuticLevel import fr.polytechnique.cmap.cnam.etl.sources.Sources object IPPExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala index 23da2b7a..18cbb4db 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala @@ -3,9 +3,9 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.{McoAssociatedDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.mco.McoRowExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses.{McoAssociatedDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mco.McoRowExtractor import fr.polytechnique.cmap.cnam.study.fall.fractures.Surgery trait ClassifyWeight extends Surgery { @@ -28,20 +28,20 @@ trait ClassifyWeight extends Surgery { } } -class MainDiagnosisFallExtractor(codes: BaseExtractorCodes) extends McoMainDiagnosisExtractor(codes) with ClassifyWeight +class MainDiagnosisFallExtractor(codes: SimpleExtractorCodes) extends McoMainDiagnosisExtractor(codes) with ClassifyWeight object MainDiagnosisFallExtractor { - def apply(codes: BaseExtractorCodes): MainDiagnosisFallExtractor = new MainDiagnosisFallExtractor(codes) + def apply(codes: SimpleExtractorCodes): MainDiagnosisFallExtractor = new MainDiagnosisFallExtractor(codes) } -class AssociatedDiagnosisFallExtractor(codes: BaseExtractorCodes) extends McoAssociatedDiagnosisExtractor(codes) with ClassifyWeight +class AssociatedDiagnosisFallExtractor(codes: SimpleExtractorCodes) extends McoAssociatedDiagnosisExtractor(codes) with ClassifyWeight object AssociatedDiagnosisFallExtractor { - def apply(codes: BaseExtractorCodes): AssociatedDiagnosisFallExtractor = new AssociatedDiagnosisFallExtractor(codes) + def apply(codes: SimpleExtractorCodes): AssociatedDiagnosisFallExtractor = new AssociatedDiagnosisFallExtractor(codes) } -class LinkedDiagnosisFallExtractor(codes: BaseExtractorCodes) extends McoLinkedDiagnosisExtractor(codes) with ClassifyWeight +class LinkedDiagnosisFallExtractor(codes: SimpleExtractorCodes) extends McoLinkedDiagnosisExtractor(codes) with ClassifyWeight object LinkedDiagnosisFallExtractor { - def apply(codes: BaseExtractorCodes): LinkedDiagnosisFallExtractor = new LinkedDiagnosisFallExtractor(codes) + def apply(codes: SimpleExtractorCodes): LinkedDiagnosisFallExtractor = new LinkedDiagnosisFallExtractor(codes) } \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala index 6b642310..70a0decf 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala @@ -4,9 +4,9 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.{DrugConfig, DrugExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.Opioids -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.TherapeuticLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.Opioids +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.TherapeuticLevel import fr.polytechnique.cmap.cnam.etl.sources.Sources object OpioidsExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneConfig.scala index b92da34a..474aa684 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneConfig.scala @@ -8,9 +8,9 @@ import me.danielpes.spark.datetime.Period import me.danielpes.spark.datetime.implicits._ import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig import fr.polytechnique.cmap.cnam.etl.config.{BaseConfig, ConfigLoader} -import fr.polytechnique.cmap.cnam.etl.extractors.acts.MedicalActsConfig -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.DiagnosesConfig -import fr.polytechnique.cmap.cnam.etl.extractors.molecules.MoleculePurchasesConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.MedicalActsConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses.DiagnosesConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.molecules.MoleculePurchasesConfig import fr.polytechnique.cmap.cnam.etl.extractors.patients.PatientsConfig import fr.polytechnique.cmap.cnam.etl.transformers.exposures._ import fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformerConfig diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala index 3ac27403..e45b892f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala @@ -8,8 +8,8 @@ import scala.collection.mutable.ListBuffer import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoHospitalStaysExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.molecules.MoleculePurchases +import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.molecules.MoleculePurchases import fr.polytechnique.cmap.cnam.etl.extractors.patients.Patients import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters import fr.polytechnique.cmap.cnam.etl.implicits diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/Diagnoses.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/Diagnoses.scala index b82e3fd4..e4076e8e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/Diagnoses.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/Diagnoses.scala @@ -4,8 +4,8 @@ package fr.polytechnique.cmap.cnam.study.pioglitazone.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions @@ -13,9 +13,9 @@ class Diagnoses(config: DiagnosesConfig) { def extract(sources: Sources): Dataset[Event[Diagnosis]] = { - val mainDiag = McoMainDiagnosisExtractor(BaseExtractorCodes(config.dpCodes)).extract(sources) - val linkedDiag = McoLinkedDiagnosisExtractor(BaseExtractorCodes(config.drCodes)).extract(sources) - val associatedDiag = McoAssociatedDiagnosisExtractor(BaseExtractorCodes(config.daCodes)).extract(sources) + val mainDiag = McoMainDiagnosisExtractor(SimpleExtractorCodes(config.dpCodes)).extract(sources) + val linkedDiag = McoLinkedDiagnosisExtractor(SimpleExtractorCodes(config.drCodes)).extract(sources) + val associatedDiag = McoAssociatedDiagnosisExtractor(SimpleExtractorCodes(config.daCodes)).extract(sources) //val imbDiag = ImbDiagnosisExtractor.extract(sources, config.imbCodes.toSet) functions.unionDatasets(mainDiag, linkedDiag, associatedDiag)//, imbDiag) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/MedicalActs.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/MedicalActs.scala index 6a629a9f..6457a200 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/MedicalActs.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/extractors/MedicalActs.scala @@ -4,16 +4,16 @@ package fr.polytechnique.cmap.cnam.study.pioglitazone.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Event, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.acts._ +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.unionDatasets class MedicalActs(config: MedicalActsConfig) { def extract(sources: Sources): Dataset[Event[MedicalAct]] = { - val dcirActs = DcirMedicalActExtractor(BaseExtractorCodes(config.dcirCodes)).extract(sources) - val ccamActs = McoCcamActExtractor(BaseExtractorCodes(config.mcoCCAMCodes)).extract(sources) + val dcirActs = DcirMedicalActExtractor(SimpleExtractorCodes(config.dcirCodes)).extract(sources) + val ccamActs = McoCcamActExtractor(SimpleExtractorCodes(config.mcoCCAMCodes)).extract(sources) //val cimActs = McoCimMedicalActExtractor(BaseExtractorCodes(config.mcoCIMCodes)).extract(sources) unionDatasets(dcirActs, ccamActs) //, cimActs diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneConfig.scala index 7797e179..77e5e45f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneConfig.scala @@ -7,8 +7,8 @@ import pureconfig.generic.auto._ import me.danielpes.spark.datetime.implicits._ import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig import fr.polytechnique.cmap.cnam.etl.config.{BaseConfig, ConfigLoader} -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.DiagnosesConfig -import fr.polytechnique.cmap.cnam.etl.extractors.molecules.MoleculePurchasesConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses.DiagnosesConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.molecules.MoleculePurchasesConfig import fr.polytechnique.cmap.cnam.etl.extractors.patients.PatientsConfig import fr.polytechnique.cmap.cnam.etl.transformers.exposures._ import fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformerConfig diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala index a1e6a873..fd7da8bf 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala @@ -7,8 +7,8 @@ import scala.collection.mutable.ListBuffer import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoHospitalStaysExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.molecules.MoleculePurchases +import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.molecules.MoleculePurchases import fr.polytechnique.cmap.cnam.etl.extractors.patients.Patients import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters import fr.polytechnique.cmap.cnam.etl.implicits diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/extractors/Diagnoses.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/extractors/Diagnoses.scala index e748357b..5fe80f52 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/extractors/Diagnoses.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/extractors/Diagnoses.scala @@ -4,8 +4,8 @@ package fr.polytechnique.cmap.cnam.study.rosiglitazone.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions @@ -13,9 +13,9 @@ class Diagnoses(config: DiagnosesConfig) { def extract(sources: Sources): Dataset[Event[Diagnosis]] = { - val mainDiag = McoMainDiagnosisExtractor(BaseExtractorCodes(config.dpCodes)).extract(sources) - val linkedDiag = McoLinkedDiagnosisExtractor(BaseExtractorCodes(config.drCodes)).extract(sources) - val associatedDiag = McoAssociatedDiagnosisExtractor(BaseExtractorCodes(config.daCodes)).extract(sources) + val mainDiag = McoMainDiagnosisExtractor(SimpleExtractorCodes(config.dpCodes)).extract(sources) + val linkedDiag = McoLinkedDiagnosisExtractor(SimpleExtractorCodes(config.drCodes)).extract(sources) + val associatedDiag = McoAssociatedDiagnosisExtractor(SimpleExtractorCodes(config.daCodes)).extract(sources) functions.unionDatasets(mainDiag, linkedDiag, associatedDiag) } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractorSuite.scala index 5471a21e..9ffe7fb8 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractorSuite.scala @@ -14,7 +14,7 @@ class EventRowExtractorSuite extends SharedContext { def extractStart(r: Row) = new Timestamp(0) } - "extractGroupId" should "return the group ID for imb (always 'imb')" in { + "extractGroupId" should "return NA" in { // Given val expected = "NA" @@ -26,8 +26,20 @@ class EventRowExtractorSuite extends SharedContext { assert(result == expected) } + "extractValue" should "return NA" in { - "weight" should "return the weight value" in { + // Given + val expected = "NA" + + // When + val result = MockRowExtractor.extractGroupId(Row()) + + // Then + assert(result == expected) + } + + + "weight" should "return 0.0" in { // Given val expected = 0.0 @@ -40,7 +52,7 @@ class EventRowExtractorSuite extends SharedContext { } - "end" should "compute the end date of the event" in { + "end" should "return None" in { // Given val expected: Option[Timestamp] = None diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/PrescriptionExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/PrescriptionExtractorSuite.scala deleted file mode 100644 index 198c7c2f..00000000 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/PrescriptionExtractorSuite.scala +++ /dev/null @@ -1,46 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors - -import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema -import org.apache.spark.sql.types.{StringType, StructField, StructType} -import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, EventBuilder, EventCategory} -import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirRowExtractor - - -trait MockEvent extends AnyEvent with EventBuilder - -object MockEventobject extends MockEvent { - override val category: EventCategory[AnyEvent] = "NA" -} - -class PrescriptionExtractorSuite extends SharedContext { - - object MockPrescriptionExtractor extends DcirRowExtractor - - "extractGroupId" should "return the group ID for done values" in { - // Given - val schema = StructType( - Seq( - StructField("FLX_DIS_DTD", StringType), - StructField("FLX_TRT_DTD", StringType), - StructField("FLX_EMT_TYP", StringType), - StructField("FLX_EMT_NUM", StringType), - StructField("FLX_EMT_ORD", StringType), - StructField("ORG_CLE_NUM", StringType), - StructField("DCT_ORD_NUM", StringType) - ) - ) - - val values = Array[Any]("2014-08-01", "2014-07-17", "1", "17", "0", "01C673000", "1749") - val r = new GenericRowWithSchema(values, schema) - val expected = "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMwMDBfMTc0OQ==" - - // When - val result = MockPrescriptionExtractor.extractGroupId(r) - - // Then - assert(result == expected) - } -} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirBiologyActsSuite.scala similarity index 87% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirBiologyActsSuite.scala index 42cc922d..b913a522 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirBiologyActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirBiologyActsSuite.scala @@ -1,11 +1,11 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types._ import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{BiologyDcirAct, DcirAct, Event, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirSource +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir.DcirSource import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -32,7 +32,7 @@ class DcirBiologyActsSuite extends SharedContext { it should "return false when no code is found in the row" in { // Given - val codes = BaseExtractorCodes(List("AAAA", "BBBB")) + val codes = SimpleExtractorCodes(List("AAAA", "BBBB")) val inputArray = Array[Any]("Patient_A", "CCCC", 1D, 0D, 1D, makeTS(2010, 1, 1)) val inputRow = new GenericRowWithSchema(inputArray, schema) @@ -54,7 +54,7 @@ class DcirBiologyActsSuite extends SharedContext { val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor(BaseExtractorCodes(List("AAAA", "BBBB"))).extractGroupId(input) + val result = DcirBiologyActExtractor(SimpleExtractorCodes(List("AAAA", "BBBB"))).extractGroupId(input) // Then assert(result == DcirAct.groupID.PrivateAmbulatory) @@ -67,7 +67,7 @@ class DcirBiologyActsSuite extends SharedContext { val array = Array[Any](1D) val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor(BaseExtractorCodes.empty).extractGroupId(input) + val result = DcirBiologyActExtractor(SimpleExtractorCodes.empty).extractGroupId(input) // Then assert(result == DcirAct.groupID.PublicAmbulatory) @@ -84,7 +84,7 @@ class DcirBiologyActsSuite extends SharedContext { val array = Array[Any](null, null) val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor(BaseExtractorCodes.empty).extractGroupId(input) + val result = DcirBiologyActExtractor(SimpleExtractorCodes.empty).extractGroupId(input) // Then assert(result == DcirAct.groupID.Liberal) @@ -101,7 +101,7 @@ class DcirBiologyActsSuite extends SharedContext { val array = Array[Any](null, 0D, 4D) val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor(BaseExtractorCodes.empty).extractGroupId(input) + val result = DcirBiologyActExtractor(SimpleExtractorCodes.empty).extractGroupId(input) // Then assert(result == DcirAct.groupID.PrivateAmbulatory) @@ -118,7 +118,7 @@ class DcirBiologyActsSuite extends SharedContext { val array = Array[Any](null, 1D, 4D) val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor(BaseExtractorCodes.empty).extractGroupId(input) + val result = DcirBiologyActExtractor(SimpleExtractorCodes.empty).extractGroupId(input) // Then assert(result == DcirAct.groupID.Unknown) @@ -133,7 +133,7 @@ class DcirBiologyActsSuite extends SharedContext { val array = Array[Any](0D, 2D, 6D) val input = new GenericRowWithSchema(array, schema) // When - val result = DcirBiologyActExtractor(BaseExtractorCodes.empty).extractGroupId(input) + val result = DcirBiologyActExtractor(SimpleExtractorCodes.empty).extractGroupId(input) // Then assert(result == DcirAct.groupID.DcirAct) @@ -179,7 +179,7 @@ class DcirBiologyActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val codes = BaseExtractorCodes(List("238")) + val codes = SimpleExtractorCodes(List("238")) val input = sqlCtx.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActsSuite.scala similarity index 94% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActsSuite.scala index 9350a843..1c099c4e 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/DcirMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActsSuite.scala @@ -1,10 +1,10 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import org.apache.spark.sql.types._ import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.extractors.dcir.DcirSource +import fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir.DcirSource class DcirMedicalActsSuite extends SharedContext { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/HadMedicalActsSuite.scala similarity index 84% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadMedicalActsSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/HadMedicalActsSuite.scala index 45d79e6e..aeed4987 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/HadMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/HadMedicalActsSuite.scala @@ -1,8 +1,8 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Event, HadCCAMAct, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -13,7 +13,7 @@ class HadMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ccamCodes = BaseExtractorCodes(List("HPQD001")) + val ccamCodes = SimpleExtractorCodes(List("HPQD001")) val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") val expected = Seq[Event[MedicalAct]]( HadCCAMAct("patient02", "10000201_30000150_2019", "HPQD001", makeTS(2019, 12, 24)), @@ -42,7 +42,7 @@ class HadMedicalActsSuite extends SharedContext { val input = Sources(had = Some(had)) // When - val result = HadCcamActExtractor(BaseExtractorCodes.empty).extract(input) + val result = HadCcamActExtractor(SimpleExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCEMedicalActsSuite.scala similarity index 84% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCEMedicalActsSuite.scala index d29e680c..8c363768 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoCEMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCEMedicalActsSuite.scala @@ -1,13 +1,13 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCeCcamAct, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.mcoCe.McoCeSource +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mcoce.McoCeSource import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -16,7 +16,7 @@ class McoCEMedicalActsSuite extends SharedContext { "isInStudy" should "return true if row is in study" in { val colNames = new McoCeSource {}.ColNames // Given - val codes = BaseExtractorCodes(List("coloscopie")) + val codes = SimpleExtractorCodes(List("coloscopie")) val schema = StructType( StructField(colNames.PatientID, StringType) :: StructField(colNames.CamCode, StringType) :: @@ -36,7 +36,7 @@ class McoCEMedicalActsSuite extends SharedContext { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val cim10Codes = BaseExtractorCodes(List("DEM")) + val cim10Codes = SimpleExtractorCodes(List("DEM")) val mcoCe = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") val expected = Seq[Event[MedicalAct]]( McoCeCcamAct("200410", "190000059_00022621_2014", "DEMP002", makeTS(2014, 4, 18)) @@ -63,7 +63,7 @@ class McoCEMedicalActsSuite extends SharedContext { val input = Sources(mcoCe = Some(mcoCe)) // When - val result = McoCeCcamActExtractor(BaseExtractorCodes.empty).extract(input) + val result = McoCeCcamActExtractor(SimpleExtractorCodes.empty).extract(input) // Then assertDSs(expected, result) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoMedicalActsSuite.scala similarity index 87% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoMedicalActsSuite.scala index a75d653f..6cd06ea4 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/McoMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoMedicalActsSuite.scala @@ -1,10 +1,10 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions._ @@ -15,7 +15,7 @@ class McoMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ccamCodes = BaseExtractorCodes(List("AAAA123")) + val ccamCodes = SimpleExtractorCodes(List("AAAA123")) val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val expected = Seq[Event[MedicalAct]]( McoCCAMAct("Patient_02", "10000123_10000987_2006", "AAAA123", makeTS(2005, 12, 31)), @@ -48,7 +48,7 @@ class McoMedicalActsSuite extends SharedContext { val input = Sources(mco = Some(mco)) // When - val result = McoCcamActExtractor(BaseExtractorCodes.empty).extract(input) + val result = McoCcamActExtractor(SimpleExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/MedicalActsConfigSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/MedicalActsConfigSuite.scala similarity index 83% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/MedicalActsConfigSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/MedicalActsConfigSuite.scala index 7c5cf1a4..e0f49b2a 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/MedicalActsConfigSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/MedicalActsConfigSuite.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import org.scalatest.matchers.should.Matchers.{a, convertToAnyShouldWrapper} import fr.polytechnique.cmap.cnam.SharedContext diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrCEMedicalActsSuite.scala similarity index 85% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrCEMedicalActsSuite.scala index 70543a11..70e3525c 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrCEMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrCEMedicalActsSuite.scala @@ -1,13 +1,13 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{DateType, StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Event, MedicalAct, SsrCEAct} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.ssrce.SsrCeSource +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.ssrce.SsrCeSource import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -24,7 +24,7 @@ class SsrCEMedicalActsSuite extends SharedContext { "isInStudy" should "return true when a study code is found in the row" in { // Given - val codes = BaseExtractorCodes(List("AAAA", "BBBB")) + val codes = SimpleExtractorCodes(List("AAAA", "BBBB")) val inputArray = Array[Any]("Patient_A", "AAAA", makeTS(2010, 1, 1)) val inputRow = new GenericRowWithSchema(inputArray, schema) @@ -38,7 +38,7 @@ class SsrCEMedicalActsSuite extends SharedContext { it should "return false when no code is found in the row" in { // Given - val codes = BaseExtractorCodes(List("AAAA", "BBBB")) + val codes = SimpleExtractorCodes(List("AAAA", "BBBB")) val inputArray = Array[Any]("Patient_A", "CCCC", makeTS(2010, 1, 1)) val inputRow = new GenericRowWithSchema(inputArray, schema) @@ -55,7 +55,7 @@ class SsrCEMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val codes = BaseExtractorCodes(List("AAAA", "CCCC")) + val codes = SimpleExtractorCodes(List("AAAA", "CCCC")) val input = Seq( ("Patient_A", "AAAA", makeTS(2010, 1, 1)), diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrMedicalActsSuite.scala similarity index 87% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrMedicalActsSuite.scala index 2c38dd60..ba0dd19f 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/acts/SsrMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/SsrMedicalActsSuite.scala @@ -1,8 +1,8 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.acts +package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions._ @@ -13,7 +13,7 @@ class SsrMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ccamCodes = BaseExtractorCodes(List("AHQP001")) + val ccamCodes = SimpleExtractorCodes(List("AHQP001")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[MedicalAct]]( SsrCCAMAct("Patient_02", "10000123_30000546_200_2019", "AHQP001", makeTS(2019, 8, 11)), @@ -43,7 +43,7 @@ class SsrMedicalActsSuite extends SharedContext { val input = Sources(ssr = Some(ssr)) // When - val result = SsrCcamActExtractor(BaseExtractorCodes.empty).extract(input) + val result = SsrCcamActExtractor(SimpleExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) @@ -54,7 +54,7 @@ class SsrMedicalActsSuite extends SharedContext { import sqlCtx.implicits._ // Given - val ccarrCodes = BaseExtractorCodes(List("BLR+156")) + val ccarrCodes = SimpleExtractorCodes(List("BLR+156")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[MedicalAct]]( SsrCSARRAct("Patient_02", "10000123_30000546_200_2019", "BLR+156", makeTS(2019, 8, 11)), @@ -84,7 +84,7 @@ class SsrMedicalActsSuite extends SharedContext { val input = Sources(ssr = Some(ssr)) // When - val result = SsrCsarrActExtractor(BaseExtractorCodes.empty).extract(input) + val result = SsrCsarrActExtractor(SimpleExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GHMClassificationsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/classifications/GHMClassificationsSuite.scala similarity index 87% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GHMClassificationsSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/classifications/GHMClassificationsSuite.scala index 4f530024..007555d1 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/classifications/GHMClassificationsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/classifications/GHMClassificationsSuite.scala @@ -1,10 +1,10 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.classifications +package fr.polytechnique.cmap.cnam.etl.extractors.events.classifications import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.GHMClassification -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions._ @@ -16,7 +16,7 @@ class GHMClassificationsSuite extends SharedContext { // Given val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") - val ghmCodes = BaseExtractorCodes(List("12H50L")) + val ghmCodes = SimpleExtractorCodes(List("12H50L")) val expected = Seq( GHMClassification("Patient_02", "10000123_20000123_2007", "12H50L", makeTS(2007, 1, 29)), @@ -51,7 +51,7 @@ class GHMClassificationsSuite extends SharedContext { val sources = Sources(mco = Some(mco)) // When - val result = GhmExtractor(BaseExtractorCodes.empty).extract(sources) + val result = GhmExtractor(SimpleExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosesSuite.scala similarity index 87% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosesSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosesSuite.scala index 4fd73886..bf97c9b0 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/HadDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosesSuite.scala @@ -1,8 +1,8 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses +package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event, HadAssociatedDiagnosis, HadMainDiagnosis} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -13,7 +13,7 @@ class HadDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val dpCodes = BaseExtractorCodes(List("G970")) + val dpCodes = SimpleExtractorCodes(List("G970")) val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") val sources = Sources(had = Some(had)) @@ -44,7 +44,7 @@ class HadDiagnosesSuite extends SharedContext { ).toDS // When - val result = HadMainDiagnosisExtractor(BaseExtractorCodes.empty).extract(sources) + val result = HadMainDiagnosisExtractor(SimpleExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) @@ -55,7 +55,7 @@ class HadDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val associatedDiagnosis = BaseExtractorCodes(List("G9")) + val associatedDiagnosis = SimpleExtractorCodes(List("G9")) val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") val sources = Sources(had = Some(had)) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/ImbDiagnosesSuite.scala similarity index 86% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/ImbDiagnosesSuite.scala index e59b2757..c2898c85 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/ImbDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/ImbDiagnosesSuite.scala @@ -1,10 +1,10 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses +package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.ImbCcamDiagnosis -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -20,7 +20,7 @@ class ImbDiagnosesSuite extends SharedContext { val sources = Sources(irImb = Some(imb)) // When - val output = ImbCimDiagnosisExtractor(BaseExtractorCodes(List("C67"))).extract(sources) + val output = ImbCimDiagnosisExtractor(SimpleExtractorCodes(List("C67"))).extract(sources) // Then assertDSs(expected, output) @@ -40,7 +40,7 @@ class ImbDiagnosesSuite extends SharedContext { val sources = Sources(irImb = Some(imb)) // When - val output = ImbCimDiagnosisExtractor(BaseExtractorCodes.empty).extract(sources) + val output = ImbCimDiagnosisExtractor(SimpleExtractorCodes.empty).extract(sources) // Then assertDSs(expected, output) @@ -68,7 +68,7 @@ class ImbDiagnosesSuite extends SharedContext { val sources = Sources(irImb = Some(imb)) // When - val output = ImbCimDiagnosisExtractor(BaseExtractorCodes.empty).extract(sources) + val output = ImbCimDiagnosisExtractor(SimpleExtractorCodes.empty).extract(sources) // Then assertDSs(expected, output) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosesSuite.scala similarity index 89% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosesSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosesSuite.scala index bf5103f5..168806a7 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/McoDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosesSuite.scala @@ -1,10 +1,10 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses +package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event, McoAssociatedDiagnosis, McoLinkedDiagnosis, McoMainDiagnosis} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -15,7 +15,7 @@ class McoDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val dpCodes = BaseExtractorCodes(List("C67")) + val dpCodes = SimpleExtractorCodes(List("C67")) val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val sources = Sources(mco = Some(mco)) @@ -41,7 +41,7 @@ class McoDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val linkedCodes = BaseExtractorCodes(List("E05", "E08")) + val linkedCodes = SimpleExtractorCodes(List("E05", "E08")) val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val sources = Sources(mco = Some(mco)) @@ -62,7 +62,7 @@ class McoDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val associatedDiagnosis = BaseExtractorCodes(List("C66")) + val associatedDiagnosis = SimpleExtractorCodes(List("C66")) val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") val sources = Sources(mco = Some(mco)) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosesSuite.scala similarity index 88% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosesSuite.scala index bc520a3e..dbe9c8a8 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/diagnoses/SsrDiagnosesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosesSuite.scala @@ -1,8 +1,8 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.diagnoses +package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, Event, SsrAssociatedDiagnosis, SsrLinkedDiagnosis, SsrMainDiagnosis, SsrTakingOverPurpose} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -13,7 +13,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val dpCodes = BaseExtractorCodes(List("C66")) + val dpCodes = SimpleExtractorCodes(List("C66")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) @@ -47,7 +47,7 @@ class SsrDiagnosesSuite extends SharedContext { // When - val result = SsrMainDiagnosisExtractor(BaseExtractorCodes.empty).extract(sources) + val result = SsrMainDiagnosisExtractor(SimpleExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) @@ -58,7 +58,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val linkedCodes = BaseExtractorCodes(List("C6")) + val linkedCodes = SimpleExtractorCodes(List("C6")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) @@ -80,7 +80,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val associatedDiagnosis = BaseExtractorCodes(List("C6")) + val associatedDiagnosis = SimpleExtractorCodes(List("C6")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val sources = Sources(ssr = Some(ssr)) @@ -101,7 +101,7 @@ class SsrDiagnosesSuite extends SharedContext { import sqlCtx.implicits._ // Given - val cim10Codes = BaseExtractorCodes(List("Z100")) + val cim10Codes = SimpleExtractorCodes(List("Z100")) val ssr = spark.read.parquet("src/test/resources/test-joined/SSR.parquet") val expected = Seq[Event[Diagnosis]]( SsrTakingOverPurpose("Patient_02", "10000123_30000546_300_2019", "Z100", makeTS(2019, 8, 11)) @@ -129,7 +129,7 @@ class SsrDiagnosesSuite extends SharedContext { val input = Sources(ssr = Some(ssr)) // When - val result = SsrTakingOverPurposeExtractor(BaseExtractorCodes.empty).extract(input) + val result = SsrTakingOverPurposeExtractor(SimpleExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugsExtractorSuite.scala similarity index 98% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugsExtractorSuite.scala index d5c466e5..96a47716 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/DrugsExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugsExtractorSuite.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs import org.apache.spark.sql.Dataset import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema @@ -8,9 +8,9 @@ import org.apache.spark.sql.functions.lit import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification._ -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.{Antidepresseurs, Antihypertenseurs, Hypnotiques, Neuroleptiques} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.{Cip13Level, MoleculeCombinationLevel, PharmacologicalLevel, TherapeuticLevel} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification._ +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.{Antidepresseurs, Antihypertenseurs, Hypnotiques, Neuroleptiques} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.{Cip13Level, MoleculeCombinationLevel, PharmacologicalLevel, TherapeuticLevel} import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/PharmacologicalClassConfigSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/PharmacologicalClassConfigSuite.scala similarity index 92% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/PharmacologicalClassConfigSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/PharmacologicalClassConfigSuite.scala index 73159c6b..07dc5a81 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/PharmacologicalClassConfigSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/PharmacologicalClassConfigSuite.scala @@ -1,10 +1,10 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.PharmacologicalClassConfig -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.families.Antidepresseurs +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.PharmacologicalClassConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.Antidepresseurs class PharmacologicalClassConfigSuite extends SharedContext { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/Cip13LevelSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/Cip13LevelSuite.scala similarity index 90% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/Cip13LevelSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/Cip13LevelSuite.scala index a83ee3e6..15922c41 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/Cip13LevelSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/Cip13LevelSuite.scala @@ -1,12 +1,12 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.level +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level import org.mockito.Mockito import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} class Cip13LevelSuite extends SharedContext { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/DrugClassficationLevelSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/DrugClassficationLevelSuite.scala similarity index 90% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/DrugClassficationLevelSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/DrugClassficationLevelSuite.scala index d50dc0a3..5e4196e1 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/DrugClassficationLevelSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/DrugClassficationLevelSuite.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.level +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level import org.scalatest.matchers.should.Matchers.{a, convertToAnyShouldWrapper} import fr.polytechnique.cmap.cnam.SharedContext diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/MoleculeCombinationSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/MoleculeCombinationSuite.scala similarity index 90% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/MoleculeCombinationSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/MoleculeCombinationSuite.scala index 70c8c1e7..e5f80f4a 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/MoleculeCombinationSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/MoleculeCombinationSuite.scala @@ -1,12 +1,12 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.level +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level import org.mockito.Mockito import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} class MoleculeCombinationSuite extends SharedContext { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/PharmacologicalSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/PharmacologicalSuite.scala similarity index 92% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/PharmacologicalSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/PharmacologicalSuite.scala index fdcc8a64..7b407607 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/PharmacologicalSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/PharmacologicalSuite.scala @@ -1,11 +1,11 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.level +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} class PharmacologicalSuite extends SharedContext { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/TherapeuticSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/TherapeuticSuite.scala similarity index 92% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/TherapeuticSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/TherapeuticSuite.scala index 311d8793..9ad52ff5 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/drugs/level/TherapeuticSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/level/TherapeuticSuite.scala @@ -1,12 +1,12 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.drugs.level +package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level import org.mockito.Mockito import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.{DrugClassConfig, PharmacologicalClassConfig} class TherapeuticSuite extends SharedContext { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/HadHospitalStayExtractorSuite.scala similarity index 96% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStayExtractorSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/HadHospitalStayExtractorSuite.scala index aa19bfef..d258e695 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/HadHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/HadHospitalStayExtractorSuite.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays +package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoHospitalStayExtractorSuite.scala similarity index 98% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoHospitalStayExtractorSuite.scala index f97fece3..bc22528b 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoHospitalStayExtractorSuite.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays +package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoceEmergenciesExtractorSuite.scala similarity index 97% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoceEmergenciesExtractorSuite.scala index 765f9593..7e33563a 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/McoceEmergenciesExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoceEmergenciesExtractorSuite.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays +package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.McoceEmergency diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/SSrHospitalStayExtractorSuite.scala similarity index 96% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/SSrHospitalStayExtractorSuite.scala index e7875f3a..0e49f767 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/hospitalstays/SSrHospitalStayExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/SSrHospitalStayExtractorSuite.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays +package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchasesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/DcirMoleculePurchasesSuite.scala similarity index 99% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchasesSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/DcirMoleculePurchasesSuite.scala index c5087123..8a7c0172 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/DcirMoleculePurchasesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/DcirMoleculePurchasesSuite.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.molecules +package fr.polytechnique.cmap.cnam.etl.extractors.events.molecules import org.apache.spark.sql.DataFrame import fr.polytechnique.cmap.cnam.SharedContext diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/MoleculePurchasesSuite.scala similarity index 94% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/MoleculePurchasesSuite.scala index ab8ab5fb..b10fa674 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/molecules/MoleculePurchasesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/MoleculePurchasesSuite.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.molecules +package fr.polytechnique.cmap.cnam.etl.extractors.events.molecules import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/DcirNgapActsExtractorSuite.scala similarity index 97% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/DcirNgapActsExtractorSuite.scala index 976c202d..9e2d116b 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/DcirNgapActsExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/DcirNgapActsExtractorSuite.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts +package fr.polytechnique.cmap.cnam.etl.extractors.events.ngapacts import org.apache.spark.sql.DataFrame import fr.polytechnique.cmap.cnam.SharedContext diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoNgapActsExtractorSuite.scala similarity index 98% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoNgapActsExtractorSuite.scala index 624ebb38..ef243a2b 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ngapacts/McoNgapActsExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoNgapActsExtractorSuite.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.ngapacts +package fr.polytechnique.cmap.cnam.etl.extractors.events.ngapacts import org.apache.spark.sql.DataFrame import fr.polytechnique.cmap.cnam.SharedContext diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/PractitionerClaimSpecialityExtractorSuite.scala similarity index 88% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/PractitionerClaimSpecialityExtractorSuite.scala index e3097194..fc12ddac 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/prestations/PractitionerClaimSpecialityExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/prestations/PractitionerClaimSpecialityExtractorSuite.scala @@ -1,12 +1,12 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.prestations +package fr.polytechnique.cmap.cnam.etl.extractors.events.prestations import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{StringType, StructField, StructType} import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCeFbstcMedicalPractitionerClaim, McoCeFcstcMedicalPractitionerClaim, MedicalPractitionerClaim, NonMedicalPractitionerClaim, PractitionerClaimSpeciality} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -18,7 +18,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { import sqlCtx.implicits._ // Given - val medicalSpeCodes = BaseExtractorCodes(List("42")) + val medicalSpeCodes = SimpleExtractorCodes(List("42")) val input = spark.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") val sources = Sources(dcir = Some(input)) @@ -43,7 +43,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { import sqlCtx.implicits._ // Given - val nonMedicalSpeCodes = BaseExtractorCodes(List("42")) + val nonMedicalSpeCodes = SimpleExtractorCodes(List("42")) val input = spark.read.parquet("src/test/resources/test-input/DCIR_w_BIO.parquet") val sources = Sources(dcir = Some(input)) @@ -74,7 +74,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { val expected = "A10000001" // When - val result = NonMedicalPractitionerClaimExtractor(BaseExtractorCodes.empty).extractGroupId(row) + val result = NonMedicalPractitionerClaimExtractor(SimpleExtractorCodes.empty).extractGroupId(row) // Then assert(result == expected) @@ -98,7 +98,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // When - val result = MedicalPractitionerClaimExtractor(BaseExtractorCodes.empty).extract(sources) + val result = MedicalPractitionerClaimExtractor(SimpleExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) @@ -110,7 +110,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { import sqlCtx.implicits._ // Given - val medicalSpeCodes = BaseExtractorCodes(List("1")) + val medicalSpeCodes = SimpleExtractorCodes(List("1")) val input = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") val sources = Sources(mcoCe = Some(input)) @@ -142,7 +142,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // When - val result = McoCeFbstcSpecialtyExtractor(BaseExtractorCodes.empty).extract(sources) + val result = McoCeFbstcSpecialtyExtractor(SimpleExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) @@ -166,7 +166,7 @@ class PractitionerClaimSpecialityExtractorSuite extends SharedContext { // When - val result = McoCeFcstcSpecialtyExtractor(BaseExtractorCodes.empty).extract(sources) + val result = McoCeFcstcSpecialtyExtractor(SimpleExtractorCodes.empty).extract(sources) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOveReasonSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/takeoverreasons/HadTakeOveReasonSuite.scala similarity index 85% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOveReasonSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/takeoverreasons/HadTakeOveReasonSuite.scala index 8cadd95a..dc3dded6 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/takeOverReasons/HadTakeOveReasonSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/takeoverreasons/HadTakeOveReasonSuite.scala @@ -1,8 +1,8 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.takeOverReasons +package fr.polytechnique.cmap.cnam.etl.extractors.events.takeoverreasons import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Event, HadAssociatedTakeOver, HadMainTakeOver, MedicalTakeOverReason} -import fr.polytechnique.cmap.cnam.etl.extractors.BaseExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions._ @@ -13,7 +13,7 @@ class HadTakeOveReasonSuite extends SharedContext { import sqlCtx.implicits._ // Given - val takeOverReasonCodes = BaseExtractorCodes(List("1")) + val takeOverReasonCodes = SimpleExtractorCodes(List("1")) val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") val expected = Seq[Event[MedicalTakeOverReason]]( HadMainTakeOver("patient01", "10000123_30000123_2019", "1", makeTS(2019, 11, 21)) @@ -41,7 +41,7 @@ class HadTakeOveReasonSuite extends SharedContext { val input = Sources(had = Some(had)) // When - val result = HadMainTakeOverExtractor(BaseExtractorCodes.empty).extract(input) + val result = HadMainTakeOverExtractor(SimpleExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) @@ -61,7 +61,7 @@ class HadTakeOveReasonSuite extends SharedContext { val input = Sources(had = Some(had)) // When - val result = HadAssociatedTakeOverExtractor(BaseExtractorCodes.empty).extract(input) + val result = HadAssociatedTakeOverExtractor(SimpleExtractorCodes.empty).extract(input) // Then assertDSs(result, expected) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractorSuite.scala new file mode 100644 index 00000000..77f9a0ce --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractorSuite.scala @@ -0,0 +1,97 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir + +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types.{DateType, StringType, StructField, StructType} +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class DcirRowExtractorSuite extends SharedContext { + + object MockDcirRowExtractor extends DcirRowExtractor + + "extractGroupId" should "return the groupID" in { + // Given + val schema = StructType( + Seq( + StructField("FLX_DIS_DTD", StringType), + StructField("FLX_TRT_DTD", StringType), + StructField("FLX_EMT_TYP", StringType), + StructField("FLX_EMT_NUM", StringType), + StructField("FLX_EMT_ORD", StringType), + StructField("ORG_CLE_NUM", StringType), + StructField("DCT_ORD_NUM", StringType) + ) + ) + + val values = Array[Any]("2014-08-01", "2014-07-17", "1", "17", "0", "01C673000", "1749") + val r = new GenericRowWithSchema(values, schema) + val expected = "MjAxNC0wOC0wMV8yMDE0LTA3LTE3XzFfMTdfMF8wMUM2NzMwMDBfMTc0OQ==" + + // When + val result = MockDcirRowExtractor.extractGroupId(r) + + // Then + assert(result == expected) + } + + "extractPatientId" should "return the patientId" in { + // Given + val schema = StructType( + Seq( + StructField("NUM_ENQ", StringType) + ) + ) + + val values = Array[Any]("Patient") + val r = new GenericRowWithSchema(values, schema) + val expected = "Patient" + + // When + val result = MockDcirRowExtractor.extractPatientId(r) + + // Then + assert(result == expected) + } + + "extractStart" should "return the start date" in { + // Given + val schema = StructType( + Seq( + StructField("EXE_SOI_DTD", DateType), + StructField("FLX_DIS_DTD", DateType) + ) + ) + + val values = Array[Any](makeTS(2014, 8, 1), makeTS(2014, 7, 17)) + val r = new GenericRowWithSchema(values, schema) + val expected = makeTS(2014, 8, 1) + + // When + val result = MockDcirRowExtractor.extractStart(r) + + // Then + assert(result == expected) + } + + it should "return the flow date when the start date is null" in { + // Given + val schema = StructType( + Seq( + StructField("EXE_SOI_DTD", DateType), + StructField("FLX_DIS_DTD", DateType) + ) + ) + + val values = Array[Any](null, makeTS(2014, 7, 1)) + val r = new GenericRowWithSchema(values, schema) + val expected = makeTS(2014, 7, 1) + + // When + val result = MockDcirRowExtractor.extractStart(r) + + // Then + assert(result == expected) + } +} \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractorSuite.scala new file mode 100644 index 00000000..dd6bb99b --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractorSuite.scala @@ -0,0 +1,72 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.sources.had + +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types.{DateType, IntegerType, StringType, StructField, StructType} +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class HadRowExtractorSuite extends SharedContext { + + object MockHadRowExtractor extends HadRowExtractor + + "extractGroupId" should "return the groupID" in { + // Given + val schema = StructType( + Seq( + StructField("ETA_NUM_EPMSI", StringType), + StructField("RHAD_NUM", StringType), + StructField("year", IntegerType) + ) + ) + + val values = Array[Any]("A", "B", 2000) + val r = new GenericRowWithSchema(values, schema) + val expected = "A_B_2000" + + // When + val result = MockHadRowExtractor.extractGroupId(r) + + // Then + assert(result == expected) + } + + "extractPatientId" should "return the patientId" in { + // Given + val schema = StructType( + Seq( + StructField("NUM_ENQ", StringType) + ) + ) + + val values = Array[Any]("Patient") + val r = new GenericRowWithSchema(values, schema) + val expected = "Patient" + + // When + val result = MockHadRowExtractor.extractPatientId(r) + + // Then + assert(result == expected) + } + + "extractStart" should "return the start date" in { + // Given + val schema = StructType( + Seq( + StructField("estimated_start", DateType) + ) + ) + + val values = Array[Any](makeTS(2014, 8, 1)) + val r = new GenericRowWithSchema(values, schema) + val expected = makeTS(2014, 8, 1) + + // When + val result = MockHadRowExtractor.extractStart(r) + + // Then + assert(result == expected) + } +} \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSourceSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSourceSuite.scala similarity index 97% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSourceSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSourceSuite.scala index cf5db8e8..e5e7cf1a 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/had/HadSourceSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSourceSuite.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.had +package fr.polytechnique.cmap.cnam.etl.extractors.sources.had import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractorSuite.scala new file mode 100644 index 00000000..9b5d86d1 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractorSuite.scala @@ -0,0 +1,72 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.sources.mco + +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types.{DateType, IntegerType, StringType, StructField, StructType} +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class McoRowExtractorSuite extends SharedContext { + + object MockMcoRowExtractor extends McoRowExtractor + + "extractGroupId" should "return the groupID" in { + // Given + val schema = StructType( + Seq( + StructField("ETA_NUM", StringType), + StructField("RSA_NUM", StringType), + StructField("SOR_ANN", IntegerType) + ) + ) + + val values = Array[Any]("A", "B", 2000) + val r = new GenericRowWithSchema(values, schema) + val expected = "A_B_2000" + + // When + val result = MockMcoRowExtractor.extractGroupId(r) + + // Then + assert(result == expected) + } + + "extractPatientId" should "return the patientId" in { + // Given + val schema = StructType( + Seq( + StructField("NUM_ENQ", StringType) + ) + ) + + val values = Array[Any]("Patient") + val r = new GenericRowWithSchema(values, schema) + val expected = "Patient" + + // When + val result = MockMcoRowExtractor.extractPatientId(r) + + // Then + assert(result == expected) + } + + "extractStart" should "return the start date" in { + // Given + val schema = StructType( + Seq( + StructField("estimated_start", DateType) + ) + ) + + val values = Array[Any](makeTS(2014, 8, 1)) + val r = new GenericRowWithSchema(values, schema) + val expected = makeTS(2014, 8, 1) + + // When + val result = MockMcoRowExtractor.extractStart(r) + + // Then + assert(result == expected) + } +} \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSourceSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSourceSuite.scala similarity index 97% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSourceSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSourceSuite.scala index 2580139c..8c9eafa1 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/mco/McoSourceSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSourceSuite.scala @@ -1,6 +1,6 @@ // License: BSD 3 clause -package fr.polytechnique.cmap.cnam.etl.extractors.mco +package fr.polytechnique.cmap.cnam.etl.extractors.sources.mco import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractorSuite.scala new file mode 100644 index 00000000..156f496e --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractorSuite.scala @@ -0,0 +1,72 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.sources.mcoce + +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types.{DateType, IntegerType, StringType, StructField, StructType} +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class McoCeRowExtractorSuite extends SharedContext { + + object MockMcoCeRowExtractor extends McoCeRowExtractor + + "extractGroupId" should "return the groupID" in { + // Given + val schema = StructType( + Seq( + StructField("ETA_NUM", StringType), + StructField("SEQ_NUM", StringType), + StructField("year", IntegerType) + ) + ) + + val values = Array[Any]("A", "B", 2000) + val r = new GenericRowWithSchema(values, schema) + val expected = "A_B_2000" + + // When + val result = MockMcoCeRowExtractor.extractGroupId(r) + + // Then + assert(result == expected) + } + + "extractPatientId" should "return the patientId" in { + // Given + val schema = StructType( + Seq( + StructField("NUM_ENQ", StringType) + ) + ) + + val values = Array[Any]("Patient") + val r = new GenericRowWithSchema(values, schema) + val expected = "Patient" + + // When + val result = MockMcoCeRowExtractor.extractPatientId(r) + + // Then + assert(result == expected) + } + + "extractStart" should "return the start date" in { + // Given + val schema = StructType( + Seq( + StructField("EXE_SOI_DTD", DateType) + ) + ) + + val values = Array[Any](makeTS(2014, 8, 1)) + val r = new GenericRowWithSchema(values, schema) + val expected = makeTS(2014, 8, 1) + + // When + val result = MockMcoCeRowExtractor.extractStart(r) + + // Then + assert(result == expected) + } +} \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractorSuite.scala new file mode 100644 index 00000000..2042f5f1 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractorSuite.scala @@ -0,0 +1,73 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr + +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types.{DateType, IntegerType, StringType, StructField, StructType} +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class SsrRorwExtractorSuite extends SharedContext { + + object MockSsrRowExtractor extends SsrRowExtractor + + "extractGroupId" should "return the groupID" in { + // Given + val schema = StructType( + Seq( + StructField("ETA_NUM", StringType), + StructField("RHA_NUM", StringType), + StructField("RHS_NUM", StringType), + StructField("year", IntegerType) + ) + ) + + val values = Array[Any]("A", "B", "C", 2000) + val r = new GenericRowWithSchema(values, schema) + val expected = "A_B_C_2000" + + // When + val result = MockSsrRowExtractor.extractGroupId(r) + + // Then + assert(result == expected) + } + + "extractPatientId" should "return the patientId" in { + // Given + val schema = StructType( + Seq( + StructField("NUM_ENQ", StringType) + ) + ) + + val values = Array[Any]("Patient") + val r = new GenericRowWithSchema(values, schema) + val expected = "Patient" + + // When + val result = MockSsrRowExtractor.extractPatientId(r) + + // Then + assert(result == expected) + } + + "extractStart" should "return the start date" in { + // Given + val schema = StructType( + Seq( + StructField("estimated_start", DateType) + ) + ) + + val values = Array[Any](makeTS(2014, 8, 1)) + val r = new GenericRowWithSchema(values, schema) + val expected = makeTS(2014, 8, 1) + + // When + val result = MockSsrRowExtractor.extractStart(r) + + // Then + assert(result == expected) + } +} \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSourceSuite.scala similarity index 96% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSourceSuite.scala index 2ddfecab..a0104350 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/ssr/SsrSourceSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSourceSuite.scala @@ -1,4 +1,4 @@ -package fr.polytechnique.cmap.cnam.etl.extractors.ssr +package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr import org.apache.spark.sql.DataFrame import fr.polytechnique.cmap.cnam.SharedContext diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractorSuite.scala new file mode 100644 index 00000000..67d65137 --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractorSuite.scala @@ -0,0 +1,51 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssrce + +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types.{DateType, StringType, StructField, StructType} +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class SsrCeRowExtractorSuite extends SharedContext { + + object MockSsrCeRowExtractor extends SsrCeRowExtractor + + "extractPatientId" should "return the patientId" in { + // Given + val schema = StructType( + Seq( + StructField("NUM_ENQ", StringType) + ) + ) + + val values = Array[Any]("Patient") + val r = new GenericRowWithSchema(values, schema) + val expected = "Patient" + + // When + val result = MockSsrCeRowExtractor.extractPatientId(r) + + // Then + assert(result == expected) + } + + "extractStart" should "return the start date" in { + // Given + val schema = StructType( + Seq( + StructField("EXE_SOI_DTD", DateType) + ) + ) + + val values = Array[Any](makeTS(2014, 8, 1)) + val r = new GenericRowWithSchema(values, schema) + val expected = makeTS(2014, 8, 1) + + // When + val result = MockSsrCeRowExtractor.extractStart(r) + + // Then + assert(result == expected) + } +} \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala index 4f22ac99..19979cc5 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala @@ -10,7 +10,7 @@ import me.danielpes.spark.datetime.implicits._ import org.scalatest.flatspec.AnyFlatSpec import fr.polytechnique.cmap.cnam.etl.config.BaseConfig import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig.{InputPaths, OutputPaths} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.PharmacologicalLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.PharmacologicalLevel import fr.polytechnique.cmap.cnam.etl.transformers.exposures.{LatestPurchaseBased, LimitedExposureAdder} class FallConfigSuite extends AnyFlatSpec { From e09d2ac2a9ba61408b2d4378cb0a3b3723be306e Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Wed, 1 Apr 2020 17:06:09 +0200 Subject: [PATCH 28/38] CNAM-452: Documentation. --- .../cnam/etl/extractors/SimpleExtractor.scala | 4 ++++ .../events/acts/DcirMedicalActExtractor.scala | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala index 5dbd6378..f277a6d7 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala @@ -6,6 +6,10 @@ import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +/** + * This a trait that should + * @tparam EventType + */ trait SimpleExtractor[EventType <: AnyEvent] extends Extractor[EventType, SimpleExtractorCodes] { self: EventRowExtractor => diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala index 5c9237e0..7e2bd4f0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala @@ -11,6 +11,14 @@ import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir.DcirSimpleExtractor import fr.polytechnique.cmap.cnam.util.functions.makeTS +/** + * Gets all type of Acts from DCIR. + * + * The main addition of this class is the groupId method that allows to get the + * source of the act: Liberal, PublicAmbulatory, PrivateAmbulatory, Unkown and when + * the information is not available a default DCIRAct. + * @param codes: List of Act codes to be tracked in the study or empty to get all the Acts. + */ abstract sealed class DcirRowActExtractor(codes: SimpleExtractorCodes) extends DcirSimpleExtractor[MedicalAct] with StartsWithStrategy[MedicalAct] { @@ -69,6 +77,10 @@ abstract sealed class DcirRowActExtractor(codes: SimpleExtractorCodes) extends D private def getSector(r: Row): Double = r.getAs[Double](ColNames.Sector) } +/** + * Get the CCAM coded acts from the DCIR. + * @param codes: List of Act codes to be tracked in the study or empty to get all the Acts. + */ final case class DcirMedicalActExtractor(codes: SimpleExtractorCodes) extends DcirRowActExtractor(codes) { // Implementation of the BasicExtractor Trait @@ -76,6 +88,10 @@ final case class DcirMedicalActExtractor(codes: SimpleExtractorCodes) override val eventBuilder: EventBuilder = DcirAct } +/** + * Get the biology acts from the DCIR. + * @param codes: List of Act codes to be tracked in the study or empty to get all the Acts. + */ final case class DcirBiologyActExtractor(codes: SimpleExtractorCodes) extends DcirRowActExtractor(codes) { // Implementation of the BasicExtractor Trait From fedb6e02c364bc63dba01ae884d6f5143a86ac8e Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Fri, 3 Apr 2020 10:36:02 +0200 Subject: [PATCH 29/38] CNAM-452: Add documentation. --- .../cnam/etl/extractors/ColumnNames.scala | 1 - .../etl/extractors/EventRowExtractor.scala | 5 +++ .../cmap/cnam/etl/extractors/Extractor.scala | 2 +- .../cnam/etl/extractors/SimpleExtractor.scala | 36 +++++++++++++++---- .../sources/dcir/DcirRowExtractor.scala | 4 ++- .../sources/had/HadRowExtractor.scala | 3 ++ .../sources/imb/ImbRowExtractor.scala | 31 ++++++++-------- .../sources/mco/McoRowExtractor.scala | 3 ++ .../sources/mcoce/McoCeRowExtractor.scala | 3 ++ .../sources/ssr/SsrRowExtractor.scala | 3 ++ .../sources/ssrce/SsrCeRowExtractor.scala | 3 ++ 11 files changed, 70 insertions(+), 24 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ColumnNames.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ColumnNames.scala index 37769629..a70e8233 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ColumnNames.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/ColumnNames.scala @@ -12,5 +12,4 @@ trait ColumnNames { implicit class RichColName(colName: ColName) { def toCol: Column = col(colName) } - } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala index 5e0f64fd..d67a8105 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/EventRowExtractor.scala @@ -5,6 +5,11 @@ package fr.polytechnique.cmap.cnam.etl.extractors import java.sql.Timestamp import org.apache.spark.sql.Row +/** + * Trait to be implemented to get all the information of fields. This usually implemented for simple Extractors. + * + * Provides default implementations for non groupId, value, weight and end. + */ trait EventRowExtractor { self: ColumnNames => diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala index ae3a357e..385c6658 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/Extractor.scala @@ -37,7 +37,7 @@ trait Extractor[EventType <: AnyEvent, +Codes <: ExtractorCodes] extends Seriali /** Gets and prepares all the needed columns from the Sources. * * @param sources Source object [[Sources]] that contains all sources. - * @return A dataframe with needed columns. + * @return A [[DataFrame]] with needed columns. */ def getInput(sources: Sources): DataFrame diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala index f277a6d7..9db7a9ad 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/SimpleExtractor.scala @@ -7,16 +7,36 @@ import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes /** - * This a trait that should - * @tparam EventType - */ + * Default Extractor implementation when the Extraction is simple. + * + * A simple Extractor is defined with the following characteristics: + * 1. The passed codes are of the type [[SimpleExtractorCodes]]. + * 2. Every field of an [[Event]] is mapped simply by implementing [[EventRowExtractor]]. + * 3. The `inStudy` method of [[Extractor]] is implemented through [[InStudyStrategy]] implementation. + * + * This trait has self type of [[EventRowExtractor]]. Thus every implementation must be a type [[EventRowExtractor]]. + * + * This trait defines two abstract methods: + * 1. [[EventBuilder]]: Factory that produces [[Event]] of type [[EventType]]. + * 2. columnName: name of the column that produces the value field of the [[Event]]. + * + * @tparam EventType Type of the [[Event]]. + */ trait SimpleExtractor[EventType <: AnyEvent] extends Extractor[EventType, SimpleExtractorCodes] { self: EventRowExtractor => + // Abstract methods of this trait def columnName: String + def eventBuilder: EventBuilder + + // used in the getInput method to select the columns. def neededColumns: List[String] = columnName :: self.usedColumns + // Unique method implementation of this trait of EventRowExtractor + override def extractValue(row: Row): String = row.getAs[String](columnName) + + // Implementation of the Extractor trait def isInExtractorScope(row: Row): Boolean = !row.isNullAt(row.fieldIndex(columnName)) def builder(row: Row): Seq[Event[EventType]] = { @@ -29,12 +49,14 @@ trait SimpleExtractor[EventType <: AnyEvent] extends Extractor[EventType, Simple Seq(eventBuilder[EventType](patientId, groupId, value, weight, eventDate, endDate)) } - - override def extractValue(row: Row): String = row.getAs[String](columnName) } +/** + * Defines the "inStudy" method of [[SimpleExtractor]]. + * @tparam EventType Type of Event to be extracted in the self typed [[SimpleExtractor]]. + */ sealed trait InStudyStrategy[EventType <: AnyEvent] { - self: SimpleExtractor[EventType]=> + self: SimpleExtractor[EventType] => override def isInStudy(row: Row): Boolean } @@ -50,5 +72,5 @@ trait IsInStrategy[EventType <: AnyEvent] extends InStudyStrategy[EventType] { trait StartsWithStrategy[EventType <: AnyEvent] extends InStudyStrategy[EventType] { self: SimpleExtractor[EventType] => - def isInStudy(row: Row): Boolean = getCodes.exists(extractValue(row).startsWith) + def isInStudy(row: Row): Boolean = getCodes.exists(extractValue(row).startsWith) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractor.scala index 2dcb3db2..145300fb 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirRowExtractor.scala @@ -9,7 +9,9 @@ import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor import fr.polytechnique.cmap.cnam.util.datetime.implicits._ - +/** + * Gets the following fields for DCIR sourced events: patientID, start, groupId. + */ trait DcirRowExtractor extends DcirSource with EventRowExtractor { override def usedColumns: List[ColName] = List( diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala index d504b9fe..50257691 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala @@ -4,6 +4,9 @@ import java.sql.Timestamp import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor +/** + * Gets the following fields for HAD sourced events: patientID, start, groupId. + */ trait HadRowExtractor extends HadSource with EventRowExtractor { override def usedColumns: List[String] = List( diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbRowExtractor.scala index 105dea5e..cc0d5af8 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/imb/ImbRowExtractor.scala @@ -9,13 +9,16 @@ import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor import fr.polytechnique.cmap.cnam.util.datetime import fr.polytechnique.cmap.cnam.util.functions.makeTS -/** IR_IMB_R contains the Chronic Diseases diagnoses (ALD = Affection Longue Duree) for patients once - * they have been exonerated for all cares related to this Chronic Disease. - * It is the medical service of the health insurance that grants this ALD on the proposal of the - * patient's main physician (Medecin Traitant). - * See the [online snds documentation for further details](https://documentation-snds.health-data-hub.fr/fiches/beneficiaires_ald.html#le-dispositif-des-ald) - * - */ +/** + * Gets the following fields for IMB sourced events: patientID, start, end, groupId. + * + * IR_IMB_R contains the Chronic Diseases Diagnoses or `ALD = Affection Longue Durée` for patients once + * they have been exonerated for all cares related to this chronic disease. + * It is the medical service of the health insurance that grants this ALD on the proposal of the + * patient's GP (Medecin Traitant). + * See the [online snds documentation for further details] + * (https://documentation-snds.health-data-hub.fr/fiches/beneficiaires_ald.html#le-dispositif-des-ald) + */ trait ImbRowExtractor extends ImbSource with EventRowExtractor { def extractEncoding(row: Row): String = row.getAs[String](ColNames.Encoding) @@ -29,13 +32,13 @@ trait ImbRowExtractor extends ImbSource with EventRowExtractor { } /** - * The End date of the ALD is not always written. It can takes the value 1600-01-01 which - * corresponds to a None value (not set) that we convert to None. - * See the CNAM documentation [available here](https://documentation-snds.health-data-hub.fr/fiches/beneficiaires_ald.html#annexe) - * - * @param r - * @return - */ + * The End date of the ALD is not always written. It can takes the value 1600-01-01 which + * corresponds to a None value (not set) that we convert to None. + * See the CNAM documentation [available here](https://documentation-snds.health-data-hub.fr/fiches/beneficiaires_ald.html#annexe) + * + * @param r + * @return + */ override def extractEnd(r: Row): Option[Timestamp] = { import datetime.implicits._ Try( diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractor.scala index 6cfaae64..58550be0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoRowExtractor.scala @@ -6,6 +6,9 @@ import java.sql.Timestamp import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor +/** + * Gets the following fields for MCO sourced events: patientID, start, groupId. + */ trait McoRowExtractor extends McoSource with EventRowExtractor { override def usedColumns: List[String] = ColNames.core ++ super.usedColumns diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractor.scala index 7be5b990..fd370f75 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeRowExtractor.scala @@ -7,6 +7,9 @@ import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor import fr.polytechnique.cmap.cnam.util.datetime.implicits._ +/** + * Gets the following fields for MCO_CE sourced events: patientID, start, groupId. + */ trait McoCeRowExtractor extends McoCeSource with EventRowExtractor { override def usedColumns: List[String] = super.usedColumns ++ List( ColNames.PatientID, ColNames.EtaNum, diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala index 0414e254..8166b459 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala @@ -7,6 +7,9 @@ import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources +/** + * Gets the following fields for SSR sourced events: patientID, start, groupId. + */ trait SsrRowExtractor extends SsrSource with EventRowExtractor { override def usedColumns: List[String] = ColNames.core ++ super.usedColumns diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractor.scala index 0f728d57..4f0a3ee4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssrce/SsrCeRowExtractor.scala @@ -6,6 +6,9 @@ import java.sql.Timestamp import org.apache.spark.sql.Row import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor +/** + * Gets the following fields for SSR_CE sourced events: patientID and start. + */ trait SsrCeRowExtractor extends SsrCeSource with EventRowExtractor { override def usedColumns: List[String] = ColNames.core ++ super.usedColumns From bc01a360a7abe440342164b451a5d2255beea5b7 Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Wed, 8 Apr 2020 10:22:02 +0200 Subject: [PATCH 30/38] CNAM-452: Address review comments. --- .../etl/extractors/events/diagnoses/HadDiagnosisExtractor.scala | 2 ++ .../etl/extractors/events/diagnoses/McoDiagnosisExtractor.scala | 2 ++ .../etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala | 2 ++ .../events/hospitalstays/HadHospitalStaysExtractor.scala | 2 ++ .../events/hospitalstays/McoHospitalStaysExtractor.scala | 2 ++ .../events/hospitalstays/SsrHospitalStaysExtractor.scala | 2 ++ .../events/takeoverreasons/HadTakeOverReasonExtractor.scala | 2 ++ .../cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala | 2 ++ 8 files changed, 16 insertions(+) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosisExtractor.scala index 0646ece9..6c7ab966 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/HadDiagnosisExtractor.scala @@ -1,3 +1,5 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, EventBuilder, HadAssociatedDiagnosis, HadMainDiagnosis} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosisExtractor.scala index 6017179b..d12b614b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/McoDiagnosisExtractor.scala @@ -1,3 +1,5 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses import fr.polytechnique.cmap.cnam.etl.events._ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala index 08abcd5c..b773bf47 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala @@ -1,3 +1,5 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, EventBuilder, SsrAssociatedDiagnosis, SsrLinkedDiagnosis, SsrMainDiagnosis, SsrTakingOverPurpose} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/HadHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/HadHospitalStaysExtractor.scala index 99b1852e..5af686c0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/HadHospitalStaysExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/HadHospitalStaysExtractor.scala @@ -1,3 +1,5 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import java.sql.{Date, Timestamp} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoHospitalStaysExtractor.scala index 3ebcd417..8870959e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoHospitalStaysExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/McoHospitalStaysExtractor.scala @@ -1,3 +1,5 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import java.sql.{Date, Timestamp} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/SsrHospitalStaysExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/SsrHospitalStaysExtractor.scala index b1e335ea..19ac2c6b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/SsrHospitalStaysExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/hospitalstays/SsrHospitalStaysExtractor.scala @@ -1,3 +1,5 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays import java.sql.{Date, Timestamp} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/takeoverreasons/HadTakeOverReasonExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/takeoverreasons/HadTakeOverReasonExtractor.scala index 95c9e0df..3ffd868d 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/takeoverreasons/HadTakeOverReasonExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/takeoverreasons/HadTakeOverReasonExtractor.scala @@ -1,3 +1,5 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.events.takeoverreasons import org.apache.spark.sql.Row diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala index 50257691..e99771b4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadRowExtractor.scala @@ -1,3 +1,5 @@ +// License: BSD 3 clause + package fr.polytechnique.cmap.cnam.etl.extractors.sources.had import java.sql.Timestamp From e1d17955d1462d21409856cb5c7cdd1e7ac46141 Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Wed, 8 Apr 2020 12:13:34 +0200 Subject: [PATCH 31/38] CNAM-452: Rebase with develop. --- .../ngapacts/McoCeNgapActExtractor.scala | 6 +-- .../sources/mcoce/McoCeSource.scala | 1 + .../cmap/cnam/study/bulk/BulkMain.scala | 11 ++--- .../bulk/extractors/DcirSourceExtractor.scala | 27 +++++------ .../bulk/extractors/HadSourceExtractor.scala | 25 ++++++----- .../bulk/extractors/ImbSourceExtractor.scala | 5 ++- .../extractors/McoCeSourceExtractor.scala | 33 +++++++++----- .../bulk/extractors/McoSourceExtractor.scala | 35 +++++++++++---- .../bulk/extractors/SourceExtractor.scala | 16 ++++--- .../extractors/SsrCeSourceExtractor.scala | 9 +++- .../bulk/extractors/SsrSourceExtractor.scala | 45 ++++++++++++++----- .../events/acts/McoCEMedicalActsSuite.scala | 2 +- .../extractors/DcirSourceExtractorSuite.scala | 4 +- .../extractors/SourceExtractorSuite.scala | 15 ++++--- .../SsrCeSourceExtractorSuite.scala | 6 +-- 15 files changed, 147 insertions(+), 93 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala index d2349961..4c7aa55d 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala @@ -68,8 +68,6 @@ final case class McoCeFbstcNgapActExtractor(ngapConfig: NgapActConfig[NgapActCla val keyLetterColumn: String = ColNames.NgapKeyLetterFbstc val coeffColumn: String = ColNames.NgapCoefficientFbstc override val eventBuilder: EventBuilder = McoCeFbstcNgapAct - val ngapActsConfig: NgapActConfig = ngapConfig - val coeffColumn: String = ColNames.NgapCoefficientFbstc } final case class McoCeFcstcNgapActExtractor(ngapConfig: NgapActConfig[NgapActClassConfig]) extends McoCeNgapActExtractor(ngapConfig) { @@ -77,6 +75,4 @@ final case class McoCeFcstcNgapActExtractor(ngapConfig: NgapActConfig[NgapActCla val coeffColumn: String = ColNames.NgapCoefficientFcstc override val eventBuilder: EventBuilder = McoCeFcstcNgapAct - val ngapActsConfig: NgapActConfig = ngapConfig - val coeffColumn: String = ColNames.NgapCoefficientFcstc -} +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeSource.scala index a1f73b82..b9128afd 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mcoce/McoCeSource.scala @@ -10,6 +10,7 @@ trait McoCeSource extends ColumnNames { val EtaNum: ColName = "ETA_NUM" val SeqNum: ColName = "SEQ_NUM" val CamCode = "MCO_FMSTC__CCAM_COD" + val Year = "year" // NGAP from FBSTC val NgapKeyLetterFbstc = "MCO_FBSTC__ACT_COD" diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala index af5724e6..ab929e44 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/BulkMain.scala @@ -5,17 +5,12 @@ package fr.polytechnique.cmap.cnam.study.bulk import java.io.PrintWriter import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main -import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.{DcirMedicalActExtractor, McoCcamActExtractor, McoCeCcamActExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.events.classifications.GhmExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses._ -import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.DrugExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.study.bulk.extractors._ import fr.polytechnique.cmap.cnam.util.reporting.MainMetadata -/*object BulkMain extends Main { +object BulkMain extends Main { override def appName: String = "BulkMain" override def run( @@ -41,7 +36,7 @@ import fr.polytechnique.cmap.cnam.util.reporting.MainMetadata // Write Metadata val metadata = MainMetadata( this.getClass.getName, startTimestamp, new java.util.Date(), - sourceExtractor.map(se => se.extract(sources)).flatten ++ + sourceExtractor.flatMap(se => se.extract(sources)) ++ new PatientExtractor(bulkConfig.output.root, bulkConfig.output.saveMode, bulkConfig.base).extract(sources) ) val metadataJson: String = metadata.toJsonString() @@ -53,4 +48,4 @@ import fr.polytechnique.cmap.cnam.util.reporting.MainMetadata None } -}*/ +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractor.scala index 768cb08d..b0dcae27 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractor.scala @@ -3,10 +3,11 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors import fr.polytechnique.cmap.cnam.etl.events.{Drug, MedicalAct, NgapAct, PractitionerClaimSpeciality} -import fr.polytechnique.cmap.cnam.etl.extractors.acts.{DcirBiologyActExtractor, DcirMedicalActExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.{DrugConfig, DrugExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.ngapacts.{DcirNgapActExtractor, NgapActConfig} -import fr.polytechnique.cmap.cnam.etl.extractors.prestations.{MedicalPractitionerClaimExtractor, NonMedicalPractitionerClaimExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.{DcirBiologyActExtractor, DcirMedicalActExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.ngapacts.{DcirNgapActExtractor, NgapActConfig, NgapWithNatClassConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.prestations.{MedicalPractitionerClaimExtractor, NonMedicalPractitionerClaimExtractor} class DcirSourceExtractor( override val path: String, @@ -15,25 +16,25 @@ class DcirSourceExtractor( override val sourceName: String = "DCIR" override val extractors = List( - ExtractorSources[MedicalAct](DcirMedicalActExtractor, List("ER_PRS_F", "ER_CAM_F", "ER_ETE_F"), "DCIR_MEDICAL_ACT"), - ExtractorSources[MedicalAct]( - DcirBiologyActExtractor, + ExtractorSources[MedicalAct, SimpleExtractorCodes](DcirMedicalActExtractor(SimpleExtractorCodes.empty), List("ER_PRS_F", "ER_CAM_F", "ER_ETE_F"), "DCIR_MEDICAL_ACT"), + ExtractorSources[MedicalAct, SimpleExtractorCodes]( + DcirBiologyActExtractor(SimpleExtractorCodes.empty), List("ER_PRS_F", "ER_BIO_F", "ER_ETE_F"), "DCIR_BIOLOGICAL_ACT" ), - ExtractorSources[Drug](new DrugExtractor(drugConfig), List("ER_PRS_F", "IR_PHA_R"), "DRUG_PURCHASES"), - ExtractorSources[NgapAct]( + ExtractorSources[Drug, DrugConfig](new DrugExtractor(drugConfig), List("ER_PRS_F", "IR_PHA_R"), "DRUG_PURCHASES"), + ExtractorSources[NgapAct, NgapActConfig[NgapWithNatClassConfig]]( new DcirNgapActExtractor(NgapActConfig(List.empty)), List("ER_PRS_F", "IR_NAT_V", "ER_ETE_F"), "DCIR_NGAP_ACTS" ), - ExtractorSources[PractitionerClaimSpeciality]( - MedicalPractitionerClaimExtractor, + ExtractorSources[PractitionerClaimSpeciality, SimpleExtractorCodes]( + MedicalPractitionerClaimExtractor(SimpleExtractorCodes.empty), List("ER_PRS_F"), "DCIR_MEDICAL_PRACTIONNER" ), - ExtractorSources[PractitionerClaimSpeciality]( - NonMedicalPractitionerClaimExtractor, + ExtractorSources[PractitionerClaimSpeciality, SimpleExtractorCodes]( + NonMedicalPractitionerClaimExtractor(SimpleExtractorCodes.empty), List("ER_PRS_F"), "DCIR_NON_MEDICAL_PRACTIONNER" ) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala index 7f21a231..f4549823 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/HadSourceExtractor.scala @@ -3,10 +3,11 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, HospitalStay, MedicalAct, MedicalTakeOverReason} -import fr.polytechnique.cmap.cnam.etl.extractors.acts.HadCcamActExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.{HadAssociatedDiagnosisExtractor, HadMainDiagnosisExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.HadHospitalStaysExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.takeOverReasons.{HadAssociatedTakeOverExtractor, HadMainTakeOverExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.HadCcamActExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses.{HadAssociatedDiagnosisExtractor, HadMainDiagnosisExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.HadHospitalStaysExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.takeoverreasons.{HadAssociatedTakeOverExtractor, HadMainTakeOverExtractor} class HadSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( path, @@ -14,20 +15,20 @@ class HadSourceExtractor(override val path: String, override val saveMode: Strin ) { override val sourceName: String = "HAD" override val extractors = List( - ExtractorSources[MedicalAct](HadCcamActExtractor, List("HAD_C", "HAD_A"), "HAD_CCAM_ACT"), - ExtractorSources[Diagnosis](HadMainDiagnosisExtractor, List("HAD_C", "HAD_B"), "HAD_MAIN_DIAGNOSIS"), - ExtractorSources[Diagnosis](HadAssociatedDiagnosisExtractor, List("HAD_C", "HAD_D"), "HAD_ASSOCIATED_DIAGNOSIS"), - ExtractorSources[MedicalTakeOverReason]( - HadMainTakeOverExtractor, + ExtractorSources[MedicalAct, SimpleExtractorCodes](HadCcamActExtractor(SimpleExtractorCodes.empty), List("HAD_C", "HAD_A"), "HAD_CCAM_ACT"), + ExtractorSources[Diagnosis, SimpleExtractorCodes](HadMainDiagnosisExtractor(SimpleExtractorCodes.empty), List("HAD_C", "HAD_B"), "HAD_MAIN_DIAGNOSIS"), + ExtractorSources[Diagnosis, SimpleExtractorCodes](HadAssociatedDiagnosisExtractor(SimpleExtractorCodes.empty), List("HAD_C", "HAD_D"), "HAD_ASSOCIATED_DIAGNOSIS"), + ExtractorSources[MedicalTakeOverReason, SimpleExtractorCodes]( + HadMainTakeOverExtractor(SimpleExtractorCodes.empty), List("HAD_C", "HAD_B"), "HAD_MAIN_TAKE_OVER_REASON" ), - ExtractorSources[MedicalTakeOverReason]( - HadAssociatedTakeOverExtractor, + ExtractorSources[MedicalTakeOverReason, SimpleExtractorCodes]( + HadAssociatedTakeOverExtractor(SimpleExtractorCodes.empty), List("HAD_C", "HAD_B"), "HAD_ASSOCIATED_TAKE_OVER_REASON" ), - ExtractorSources[HospitalStay]( + ExtractorSources[HospitalStay, SimpleExtractorCodes]( HadHospitalStaysExtractor, List("HAD_C", "HAD_B"), "HAD_STAYS" diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractor.scala index c333121e..1d1c0d22 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/ImbSourceExtractor.scala @@ -3,7 +3,8 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors import fr.polytechnique.cmap.cnam.etl.events.Diagnosis -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.ImbDiagnosisExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses.ImbCimDiagnosisExtractor class ImbSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( path, @@ -11,6 +12,6 @@ class ImbSourceExtractor(override val path: String, override val saveMode: Strin ) { override val sourceName: String = "IMB_R" override val extractors = List( - ExtractorSources[Diagnosis](ImbDiagnosisExtractor, List("IR_IMB_R"), "ALD") + ExtractorSources[Diagnosis, SimpleExtractorCodes](ImbCimDiagnosisExtractor(SimpleExtractorCodes.empty), List("IR_IMB_R"), "ALD") ) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractor.scala index b58fa475..a79086ea 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoCeSourceExtractor.scala @@ -3,10 +3,11 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.acts.McoCeActExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoceEmergenciesExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.ngapacts.{McoCeFbstcNgapActExtractor, McoCeFcstcNgapActExtractor, NgapActConfig} -import fr.polytechnique.cmap.cnam.etl.extractors.prestations.{McoCeFbstcSpecialtyExtractor, McoCeFcstcSpecialtyExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.McoCeCcamActExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoceEmergenciesExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.ngapacts.{McoCeFbstcNgapActExtractor, McoCeFcstcNgapActExtractor, NgapActClassConfig, NgapActConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.events.prestations.{McoCeFbstcSpecialtyExtractor, McoCeFcstcSpecialtyExtractor} class McoCeSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( path, @@ -14,27 +15,35 @@ class McoCeSourceExtractor(override val path: String, override val saveMode: Str ) { override val sourceName: String = "MCO_CE" override val extractors = List( - ExtractorSources[MedicalAct](McoCeActExtractor, List("MCO_CSTC", "MCO_FMSTC"), "MCO_CE_CCAM_ACTS"), - ExtractorSources[NgapAct]( + ExtractorSources[MedicalAct, SimpleExtractorCodes]( + McoCeCcamActExtractor(SimpleExtractorCodes.empty), + List("MCO_CSTC", "MCO_FMSTC"), + "MCO_CE_CCAM_ACTS" + ), + ExtractorSources[NgapAct, NgapActConfig[NgapActClassConfig]]( new McoCeFbstcNgapActExtractor(NgapActConfig(List.empty)), List("MCO_CSTC", "MCO_FBSTC"), "MCO_CE_FBSTC_NGAP_ACTS" ), - ExtractorSources[NgapAct]( + ExtractorSources[NgapAct, NgapActConfig[NgapActClassConfig]]( new McoCeFcstcNgapActExtractor(NgapActConfig(List.empty)), List("MCO_CSTC", "MCO_FCSTC"), "MCO_CE_FCSTC_NGAP_ACTS" ), - ExtractorSources[PractitionerClaimSpeciality]( - McoCeFbstcSpecialtyExtractor, + ExtractorSources[PractitionerClaimSpeciality, SimpleExtractorCodes]( + McoCeFbstcSpecialtyExtractor(SimpleExtractorCodes.empty), List("MCO_CSTC", "MCO_FBSTC"), "MCO_CE_FBSTC_PRACTITIONER_SPECIALITY" ), - ExtractorSources[PractitionerClaimSpeciality]( - McoCeFcstcSpecialtyExtractor, + ExtractorSources[PractitionerClaimSpeciality, SimpleExtractorCodes]( + McoCeFcstcSpecialtyExtractor(SimpleExtractorCodes.empty), List("MCO_CSTC", "MCO_FCSTC"), "MCO_CE_FCSTC_PRACTITIONER_SPECIALITY" ), - ExtractorSources[HospitalStay](McoceEmergenciesExtractor, List("MCO_CSTC", "MCO_FBSTC"), "MCO_CE_EMERGENCY_VISIT") + ExtractorSources[HospitalStay, SimpleExtractorCodes]( + McoceEmergenciesExtractor, + List("MCO_CSTC", "MCO_FBSTC"), + "MCO_CE_EMERGENCY_VISIT" + ) ) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractor.scala index 59f2a8f9..1694f927 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/McoSourceExtractor.scala @@ -3,9 +3,10 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.acts.McoCcamActExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses.{McoAssociatedDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.McoHospitalStaysExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.McoCcamActExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses.{McoAssociatedDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor class McoSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( path, @@ -13,15 +14,31 @@ class McoSourceExtractor(override val path: String, override val saveMode: Strin ) { override val sourceName: String = "MCO" override val extractors = List( - ExtractorSources[MedicalAct](McoCcamActExtractor, List("MCO_C", "MCO_A"), "MCO_CCAM_ACT"), - ExtractorSources[Diagnosis](McoMainDiagnosisExtractor, List("MCO_C", "MCO_B"), "MCO_MAIN_DIAGNOSIS"), - ExtractorSources[Diagnosis]( - McoAssociatedDiagnosisExtractor, + ExtractorSources[MedicalAct, SimpleExtractorCodes]( + McoCcamActExtractor(SimpleExtractorCodes.empty), + List("MCO_C", "MCO_A"), + "MCO_CCAM_ACT" + ), + ExtractorSources[Diagnosis, SimpleExtractorCodes]( + McoMainDiagnosisExtractor(SimpleExtractorCodes.empty), + List("MCO_C", "MCO_B"), + "MCO_MAIN_DIAGNOSIS" + ), + ExtractorSources[Diagnosis, SimpleExtractorCodes]( + McoAssociatedDiagnosisExtractor(SimpleExtractorCodes.empty), List("MCO_C", "MCO_B", "MCO_D"), "MCO_ASSOCIATED_DIAGNOSIS" ), - ExtractorSources[Diagnosis](McoLinkedDiagnosisExtractor, List("MCO_C", "MCO_B"), "MCO_LINKED_DIAGNOSIS"), - ExtractorSources[HospitalStay](McoHospitalStaysExtractor, List("MCO_C", "MCO_B"), "MCO_HOSPITAL_STAY") + ExtractorSources[Diagnosis, SimpleExtractorCodes]( + McoLinkedDiagnosisExtractor(SimpleExtractorCodes.empty), + List("MCO_C", "MCO_B"), + "MCO_LINKED_DIAGNOSIS" + ), + ExtractorSources[HospitalStay, SimpleExtractorCodes]( + McoHospitalStaysExtractor, + List("MCO_C", "MCO_B"), + "MCO_HOSPITAL_STAY" + ) ) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractor.scala index 4181f22a..2c5beaa9 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractor.scala @@ -8,6 +8,7 @@ import org.apache.log4j.Logger import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.Extractor +import fr.polytechnique.cmap.cnam.etl.extractors.codes.ExtractorCodes import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.Path import fr.polytechnique.cmap.cnam.util.reporting.{OperationMetadata, OperationReporter, OperationTypes} @@ -31,7 +32,8 @@ abstract class SourceExtractor(val path: String, val saveMode: String) { // Second, TypeTag is needed for the Spark encoder for case class, hence the explicit typing instead of AnyEvent. // @TODO: Every time you add a new Event type you will need to add it in the "with" clause val extractors: List[ExtractorSources[_ >: MedicalAct with HospitalStay with Diagnosis with Drug - with MedicalTakeOverReason with NgapAct with PractitionerClaimSpeciality <: AnyEvent with EventBuilder]] + with MedicalTakeOverReason with NgapAct with PractitionerClaimSpeciality <: AnyEvent with EventBuilder, + ExtractorCodes]] private val logger = Logger.getLogger(this.getClass) /** @@ -42,7 +44,7 @@ abstract class SourceExtractor(val path: String, val saveMode: String) { */ def extract(sources: Sources): List[OperationMetadata] = extractors.flatMap(es => runAndReport(sources)(es)) - def runAndReport[A <: AnyEvent : TypeTag](sources: Sources)(es: ExtractorSources[A]): Option[OperationMetadata] = + def runAndReport[A <: AnyEvent : TypeTag](sources: Sources)(es: ExtractorSources[A, ExtractorCodes]): Option[OperationMetadata] = run(es.extractor, sources) match { case Success(tde) => Some(report(es, tde)) case Failure(error) => { @@ -54,14 +56,14 @@ abstract class SourceExtractor(val path: String, val saveMode: String) { } } - def run[A <: AnyEvent : TypeTag](extractor: Extractor[A], sources: Sources): Try[Dataset[Event[A]]] = { + def run[A <: AnyEvent : TypeTag](extractor: Extractor[A, ExtractorCodes], sources: Sources): Try[Dataset[Event[A]]] = { Try { - extractor.extract(sources, Set.empty)(typeTag[A]) + extractor.extract(sources)(typeTag[A]) } } def report[A <: AnyEvent : TypeTag]( - extractorSources: ExtractorSources[A], + extractorSources: ExtractorSources[A, ExtractorCodes], result: Dataset[Event[A]]): OperationMetadata = OperationReporter .report( extractorSources.name, @@ -74,7 +76,7 @@ abstract class SourceExtractor(val path: String, val saveMode: String) { } -case class ExtractorSources[EventType <: AnyEvent : TypeTag]( - extractor: Extractor[EventType], +case class ExtractorSources[EventType <: AnyEvent : TypeTag, +Codes <: ExtractorCodes]( + extractor: Extractor[EventType, Codes], sources: List[String], name: String) \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractor.scala index 8da68052..91bd1f50 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractor.scala @@ -3,7 +3,8 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors import fr.polytechnique.cmap.cnam.etl.events.MedicalAct -import fr.polytechnique.cmap.cnam.etl.extractors.acts.SsrCeActExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.SsrCeActExtractor class SsrCeSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( path, @@ -11,6 +12,10 @@ class SsrCeSourceExtractor(override val path: String, override val saveMode: Str ) { override val sourceName: String = "SSR_CE" override val extractors = List( - ExtractorSources[MedicalAct](SsrCeActExtractor, List("SSR_CSTC", "SSR_FMSTC"), "SSR_CE_CCAM") + ExtractorSources[MedicalAct, SimpleExtractorCodes]( + SsrCeActExtractor(SimpleExtractorCodes.empty), + List("SSR_CSTC", "SSR_FMSTC"), + "SSR_CE_CCAM" + ) ) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractor.scala index dfbe8777..d3d2b613 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrSourceExtractor.scala @@ -3,9 +3,10 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, HospitalStay, MedicalAct} -import fr.polytechnique.cmap.cnam.etl.extractors.acts.{SsrCcamActExtractor, SsrCsarrActExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.diagnoses._ -import fr.polytechnique.cmap.cnam.etl.extractors.hospitalstays.SsrHospitalStaysExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.{SsrCcamActExtractor, SsrCsarrActExtractor} +import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses._ +import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.SsrHospitalStaysExtractor class SsrSourceExtractor(override val path: String, override val saveMode: String) extends SourceExtractor( path, @@ -13,12 +14,36 @@ class SsrSourceExtractor(override val path: String, override val saveMode: Strin ) { override val sourceName: String = "SSR" override val extractors = List( - ExtractorSources[MedicalAct](SsrCcamActExtractor, List("SSR_C", "SSR_CCAM"), "SSR_CCAM"), - ExtractorSources[MedicalAct](SsrCsarrActExtractor, List("SSR_C", "SSR_CSARR"), "SSR_CSARR"), - ExtractorSources[Diagnosis](SsrMainDiagnosisExtractor, List("SSR_C", "SSR_B"), "SSR_MAIN_DIAGNOSIS"), - ExtractorSources[Diagnosis](SsrLinkedDiagnosisExtractor, List("SSR_C", "SSR_B"), "SSR_LINKED_DIAGNOSIS"), - ExtractorSources[Diagnosis](SsrAssociatedDiagnosisExtractor, List("SSR_C", "SSR_D"), "SSR_ASSOCIATED_DIAGNOSIS"), - ExtractorSources[Diagnosis](SsrTakingOverPurposeExtractor, List("SSR_C", "SSR_B"), "SSR_TAKE_OVER_REASON"), - ExtractorSources[HospitalStay](SsrHospitalStaysExtractor, List("SSR_C", "SSR_B"), "SSR_STAY") + ExtractorSources[MedicalAct, SimpleExtractorCodes]( + SsrCcamActExtractor(SimpleExtractorCodes.empty), + List("SSR_C", "SSR_CCAM"), + "SSR_CCAM" + ), + ExtractorSources[MedicalAct, SimpleExtractorCodes]( + SsrCsarrActExtractor(SimpleExtractorCodes.empty), + List("SSR_C", "SSR_CSARR"), + "SSR_CSARR" + ), + ExtractorSources[Diagnosis, SimpleExtractorCodes]( + SsrMainDiagnosisExtractor(SimpleExtractorCodes.empty), + List("SSR_C", "SSR_B"), + "SSR_MAIN_DIAGNOSIS" + ), + ExtractorSources[Diagnosis, SimpleExtractorCodes]( + SsrLinkedDiagnosisExtractor(SimpleExtractorCodes.empty), + List("SSR_C", "SSR_B"), + "SSR_LINKED_DIAGNOSIS" + ), + ExtractorSources[Diagnosis, SimpleExtractorCodes]( + SsrAssociatedDiagnosisExtractor(SimpleExtractorCodes.empty), + List("SSR_C", "SSR_D"), + "SSR_ASSOCIATED_DIAGNOSIS" + ), + ExtractorSources[Diagnosis, SimpleExtractorCodes]( + SsrTakingOverPurposeExtractor(SimpleExtractorCodes.empty), + List("SSR_C", "SSR_B"), + "SSR_TAKE_OVER_REASON" + ), + ExtractorSources[HospitalStay, SimpleExtractorCodes](SsrHospitalStaysExtractor, List("SSR_C", "SSR_B"), "SSR_STAY") ) } \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCEMedicalActsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCEMedicalActsSuite.scala index 8c363768..483b54f1 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCEMedicalActsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/McoCEMedicalActsSuite.scala @@ -20,7 +20,7 @@ class McoCEMedicalActsSuite extends SharedContext { val schema = StructType( StructField(colNames.PatientID, StringType) :: StructField(colNames.CamCode, StringType) :: - StructField(colNames.Date, StringType) :: Nil + StructField(colNames.StartDate, StringType) :: Nil ) val data = Array[Any]("George", "coloscopie", "23012010") val input = new GenericRowWithSchema(data, schema) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractorSuite.scala index f59a86a8..9f81900f 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/DcirSourceExtractorSuite.scala @@ -4,8 +4,8 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors import org.apache.spark.sql.DataFrame import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.DrugConfig -import fr.polytechnique.cmap.cnam.etl.extractors.drugs.level.Cip13Level +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.DrugConfig +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.Cip13Level import fr.polytechnique.cmap.cnam.etl.sources.Sources class DcirSourceExtractorSuite extends SharedContext { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractorSuite.scala index fba06d5d..80ff9aa9 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SourceExtractorSuite.scala @@ -8,6 +8,7 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext} import fr.polytechnique.cmap.cnam.etl.extractors.Extractor import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Event, McoCIM10Act, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.extractors.codes.{ExtractorCodes, SimpleExtractorCodes} import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.makeTS import fr.polytechnique.cmap.cnam.util.reporting.{OperationMetadata, OperationTypes} @@ -19,9 +20,10 @@ class SourceExtractorSuite extends SharedContext { // This shouldn't be replicated anywhere and Mocking should be the preferred technique. // Mocking the Extractor is not possible because of type erasure. Type erasure make the typing of the implicit // in the extractor method of trait Extractor as the type is not known at compile time, but only at run time. - val testExtractor = new Extractor[MedicalAct] with Serializable { - override def isInStudy(codes: Set[String]) - (row: Row): Boolean = true + val testExtractor = new Extractor[MedicalAct, SimpleExtractorCodes] { + override def getCodes: SimpleExtractorCodes = SimpleExtractorCodes.empty + + override def isInStudy(row: Row): Boolean = true override def isInExtractorScope(row: Row): Boolean = true @@ -38,8 +40,7 @@ class SourceExtractorSuite extends SharedContext { } override def extract( - sources: Sources, - codes: Set[String]) + sources: Sources) (implicit ctag: universe.TypeTag[MedicalAct]): Dataset[Event[MedicalAct]] = { import sqlCtx.implicits._ Seq[Event[MedicalAct]]( @@ -75,8 +76,8 @@ class SourceExtractorSuite extends SharedContext { // When val se: SourceExtractor = new SourceExtractor(path, "overwrite") { override val sourceName: String = "Test" - override val extractors: List[ExtractorSources[MedicalAct]] = - List(ExtractorSources[MedicalAct](testExtractor, List("Mock"), "Mock")) + override val extractors: List[ExtractorSources[MedicalAct, ExtractorCodes]] = + List(ExtractorSources[MedicalAct, SimpleExtractorCodes](testExtractor, List("Mock"), "Mock")) } val result = se.extract(sources) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractorSuite.scala index 0cf52a56..df3c8970 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractorSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/SsrCeSourceExtractorSuite.scala @@ -4,14 +4,14 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.extractors.acts.SsrCeActExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.sources.ssrce.SsrCeSource import fr.polytechnique.cmap.cnam.util.functions.makeTS class SsrCeSourceExtractorSuite extends SharedContext { "extract" should "extract available Events and warns when it fails if the tables have not been flattened" in { val sqlCtx = sqlContext import sqlCtx.implicits._ - import SsrCeActExtractor.ColNames + val colNames = new SsrCeSource {}.ColNames // Given val ssrCe = Seq( ("Patient_A", "AAAA", makeTS(2010, 1, 1)), @@ -20,7 +20,7 @@ class SsrCeSourceExtractorSuite extends SharedContext { ("Patient_B", "CCCC", makeTS(2010, 4, 1)), ("Patient_C", "BBBB", makeTS(2010, 5, 1)) ).toDF( - ColNames.PatientID, ColNames.CamCode, ColNames.Date + colNames.PatientID, colNames.CamCode, colNames.StartDate ) val source = new Sources(ssrCe = Some(ssrCe)) val path = "target/test/output" From 685862d653a54b3daf8bb57bdcb478f345aad385 Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Thu, 9 Apr 2020 14:27:58 +0200 Subject: [PATCH 32/38] CNAM-457: MinimumDuration parameter for Interaction Transformer. --- src/main/resources/config/fall/default.conf | 3 +- src/main/resources/config/fall/template.conf | 2 + .../transformers/interaction/ExposureN.scala | 7 ++++ .../InteractionTransformerConfig.scala | 11 ++++- .../NLevelInteractionTransformer.scala | 4 +- .../cnam/study/fall/config/FallConfig.scala | 5 ++- .../NLevelInteractionTransformerSuite.scala | 41 ++++++++++++------- .../study/fall/config/FallConfigSuite.scala | 11 +++-- 8 files changed, 60 insertions(+), 24 deletions(-) diff --git a/src/main/resources/config/fall/default.conf b/src/main/resources/config/fall/default.conf index 1cc02a53..ad576c14 100644 --- a/src/main/resources/config/fall/default.conf +++ b/src/main/resources/config/fall/default.conf @@ -11,8 +11,9 @@ root { to_exposure_strategy = "purchase_count_based" } } - interaction { + interactions { level: 2 + minimum_duration: 30 days } drugs { level: "Therapeutic" diff --git a/src/main/resources/config/fall/template.conf b/src/main/resources/config/fall/template.conf index 2b69453c..463924a6 100644 --- a/src/main/resources/config/fall/template.conf +++ b/src/main/resources/config/fall/template.conf @@ -16,8 +16,10 @@ # exposures.end_threshold_gc: 90 days // If periodStrategy="limited", represents the period without purchases for an exposure to be considered "finished". # exposures.end_threshold_ngc: 30 days // If periodStrategy="limited", represents the period without purchases for an exposure to be considered "finished". # exposures.end_delay: 30 days // Number of periods that we add to the exposure end to delay it (lag). +# exposures.to_exposure_strategy: purchase_count_based // possible values "purchase_count_based" or "lastest_purchase_based" # interactions.level: 3 // Integer representing the maximum number of values of Interaction. Please be careful as this not scale well beyond 5 when the data contains a patient with very high number of exposures +# interactions.minimum_duration: 30 days // If Interaction duration is less than this value, it is not considered. Proxy for medication change. # drugs.level: "Therapeutic" // Options are Therapeutic, Pharmacological, MoleculeCombination # drugs.families: ["Antihypertenseurs", "Antidepresseurs", "Neuroleptiques", "Hypnotiques"] diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/ExposureN.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/ExposureN.scala index 7c71151a..7ea1c343 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/ExposureN.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/ExposureN.scala @@ -3,12 +3,19 @@ package fr.polytechnique.cmap.cnam.etl.transformers.interaction import cats.syntax.functor._ +import me.danielpes.spark.datetime.{Period => Duration} import fr.polytechnique.cmap.cnam.etl.datatypes.{NullRemainingPeriod, Period, Subtractable, RemainingPeriod} import fr.polytechnique.cmap.cnam.etl.events.{Event, Interaction} case class ExposureN(patientID: String, values: Set[String], period: Period) extends Subtractable[ExposureN] { self => + /** + * Returns duration of this ExposureN in milliseconds + * @return duration in millisecond as Long + */ + def toDuration: Long = self.period.end.getTime - self.period.start.getTime + def intersect(other: ExposureN): Option[ExposureN] = { if (self.patientID.equals(other.patientID) && self.values.intersect(other.values).isEmpty) { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/InteractionTransformerConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/InteractionTransformerConfig.scala index 9615cbf3..af83a417 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/InteractionTransformerConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/InteractionTransformerConfig.scala @@ -2,10 +2,17 @@ package fr.polytechnique.cmap.cnam.etl.transformers.interaction +import me.danielpes.spark.datetime.{Period => Duration} +import me.danielpes.spark.datetime.implicits._ import fr.polytechnique.cmap.cnam.etl.transformers.TransformerConfig -class InteractionTransformerConfig(val level: Int) extends TransformerConfig +class InteractionTransformerConfig(val level: Int, val minimumDuration: Duration) extends TransformerConfig object InteractionTransformerConfig { - def apply(level: Int = 3): InteractionTransformerConfig = new InteractionTransformerConfig(level) + def apply( + level: Int = 3, + minimumDuration: Duration = 30.days): InteractionTransformerConfig = new InteractionTransformerConfig( + level, + minimumDuration + ) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/NLevelInteractionTransformer.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/NLevelInteractionTransformer.scala index 1ffff493..7b495d5a 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/NLevelInteractionTransformer.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/NLevelInteractionTransformer.scala @@ -2,7 +2,7 @@ package fr.polytechnique.cmap.cnam.etl.transformers.interaction -import org.apache.spark.sql.{Dataset, functions} +import org.apache.spark.sql.{functions, Dataset} import fr.polytechnique.cmap.cnam.etl.datatypes._ import fr.polytechnique.cmap.cnam.etl.events.{Event, Exposure, Interaction} import fr.polytechnique.cmap.cnam.util.functions._ @@ -13,12 +13,14 @@ case class NLevelInteractionTransformer(config: InteractionTransformerConfig) ex def joinTwoExposureNDataSet(right: Dataset[ExposureN], left: Dataset[ExposureN]): Dataset[ExposureN] = { val sqlCtx = right.sqlContext import sqlCtx.implicits._ + val minimumDuration = config.minimumDuration.totalMilliseconds right .joinWith( left, left(Event.Columns.PatientID) === right(Event.Columns.PatientID) && !left("values").geq(right("values")) ) .flatMap(e => e._1.intersect(e._2)) + .filter(i => i.toDuration >= minimumDuration) .repartition(functions.col("patientID"), functions.col("values")) .cache() } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala index b4e15d68..e9f8e297 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala @@ -89,8 +89,9 @@ object FallConfig extends FallConfigLoader with FractureCodes { /** Parameters needed for the Interaction Transformer **/ case class InteractionConfig( - override val level: Int = 2 - ) extends InteractionTransformerConfig(level = level) + override val level: Int = 2, + override val minimumDuration: Period = 30.days + ) extends InteractionTransformerConfig(level = level, minimumDuration) /** Parameters needed for the diagnosesConfig **/ case class SitesConfig(sites: List[BodySite] = List(BodySites)) { diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/NLevelInteractionTransformerSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/NLevelInteractionTransformerSuite.scala index 9d9ee9ad..44d87d96 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/NLevelInteractionTransformerSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/NLevelInteractionTransformerSuite.scala @@ -2,6 +2,7 @@ package fr.polytechnique.cmap.cnam.etl.transformers.interaction +import me.danielpes.spark.datetime.implicits._ import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.datatypes.Period @@ -31,8 +32,7 @@ class NLevelInteractionTransformerSuite extends SharedContext { ExposureN("Federer", Set("Dopamine", "Diazepam"), Period(makeTS(2019, 4, 1), makeTS(2019, 5, 1))), ExposureN("Federer", Set("Paracetamol", "Dopamine"), Period(makeTS(2019, 3, 1), makeTS(2019, 5, 1))), ExposureN("Federer", Set("Paracetamol", "Dopamine"), Period(makeTS(2019, 7, 1), makeTS(2019, 8, 1))), - ExposureN("Federer", Set("Paracetamol", "Diazepam"), Period(makeTS(2019, 4, 1), makeTS(2019, 6, 1))), - ExposureN("Federer", Set("Alprazolam", "Dopamine"), Period(makeTS(2019, 2, 1), makeTS(2019, 3, 1))) + ExposureN("Federer", Set("Paracetamol", "Diazepam"), Period(makeTS(2019, 4, 1), makeTS(2019, 6, 1))) ).toDS(), Seq[ExposureN]( ExposureN("Federer", Set("Paracetamol"), Period(makeTS(2019, 3, 1), makeTS(2019, 8, 1))), @@ -43,9 +43,10 @@ class NLevelInteractionTransformerSuite extends SharedContext { ).toDS() ) - val result = NLevelInteractionTransformer(InteractionTransformerConfig(3)).elevateToExposureN(exposures, 3) + val result = NLevelInteractionTransformer(InteractionTransformerConfig(3, 30.days)).elevateToExposureN(exposures, 3) // The mapping is necessary for now as Spark seems to struggle with nested Data Structures - result.zip(expected).foreach(e => assertDSs(e._1.map(_.toInteraction).distinct(), e._2.distinct().map(_.toInteraction).distinct())) + result.zip(expected) + .foreach(e => assertDSs(e._1.map(_.toInteraction).distinct(), e._2.distinct().map(_.toInteraction).distinct())) } @@ -101,7 +102,8 @@ class NLevelInteractionTransformerSuite extends SharedContext { val result = NLevelInteractionTransformer(InteractionTransformerConfig(3)).trickleDownExposureN(input) // The mapping is necessary for now as Spark seems to struggle with nested Data Structures - result.zip(expected).foreach(e => assertDSs(e._1.map(_.toInteraction).distinct(), e._2.distinct().map(_.toInteraction).distinct())) + result.zip(expected) + .foreach(e => assertDSs(e._1.map(_.toInteraction).distinct(), e._2.distinct().map(_.toInteraction).distinct())) } "reduceHigherExposuresNFromLowerExposures" should "reduce the time period of higher ExposureN from lower ExposureN" in { @@ -158,8 +160,17 @@ class NLevelInteractionTransformerSuite extends SharedContext { ).toDS() ) - val result = NLevelInteractionTransformer(InteractionTransformerConfig(3)).reduceHigherExposuresNFromLowerExposures(interactions, higherInteractionInvolvement) - result.zip(expected).foreach(e => assertDSs(e._1.map(_.e.toInteraction).distinct(), e._2.distinct().map(_.toInteraction).distinct())) + val result = NLevelInteractionTransformer(InteractionTransformerConfig(3)).reduceHigherExposuresNFromLowerExposures( + interactions, + higherInteractionInvolvement + ) + result.zip(expected) + .foreach( + e => assertDSs( + e._1.map(_.e.toInteraction).distinct(), + e._2.distinct().map(_.toInteraction).distinct() + ) + ) } "transform" should "create interactions of level N" in { @@ -176,14 +187,14 @@ class NLevelInteractionTransformerSuite extends SharedContext { ).toDS() val expected: Dataset[Event[Interaction]] = Seq[ExposureN]( - ExposureN("Federer", Set("Paracetamol", "Dopamine", "Diazepam"), Period(makeTS(2019, 4, 1), makeTS(2019, 5, 1))), - ExposureN("Federer", Set("Paracetamol", "Dopamine"), Period(makeTS(2019, 3, 1), makeTS(2019, 4, 1))), - ExposureN("Federer", Set("Paracetamol", "Dopamine"), Period(makeTS(2019, 7, 1), makeTS(2019, 8, 1))), - ExposureN("Federer", Set("Paracetamol", "Diazepam"), Period(makeTS(2019, 5, 1), makeTS(2019, 6, 1))), - ExposureN("Federer", Set("Alprazolam", "Dopamine"), Period(makeTS(2019, 2, 1), makeTS(2019, 3, 1))), - ExposureN("Federer", Set("Paracetamol"), Period(makeTS(2019, 6, 1), makeTS(2019, 7, 1))), - ExposureN("Federer", Set("Alprazolam"), Period(makeTS(2019, 1, 1), makeTS(2019, 2, 1))) - ).toDS.map[Event[Interaction]]((e: ExposureN) => e.toInteraction) + ExposureN("Federer", Set("Paracetamol", "Dopamine", "Diazepam"), Period(makeTS(2019, 4, 1), makeTS(2019, 5, 1))), + ExposureN("Federer", Set("Paracetamol", "Dopamine"), Period(makeTS(2019, 3, 1), makeTS(2019, 4, 1))), + ExposureN("Federer", Set("Paracetamol", "Dopamine"), Period(makeTS(2019, 7, 1), makeTS(2019, 8, 1))), + ExposureN("Federer", Set("Paracetamol", "Diazepam"), Period(makeTS(2019, 5, 1), makeTS(2019, 6, 1))), + ExposureN("Federer", Set("Dopamine"), Period(makeTS(2019, 2, 1), makeTS(2019, 3, 1))), + ExposureN("Federer", Set("Paracetamol"), Period(makeTS(2019, 6, 1), makeTS(2019, 7, 1))), + ExposureN("Federer", Set("Alprazolam"), Period(makeTS(2019, 1, 1), makeTS(2019, 3, 1))) + ).toDS.map[Event[Interaction]]((e: ExposureN) => e.toInteraction) val result = NLevelInteractionTransformer(InteractionTransformerConfig(6)).transform(exposures) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala index 19979cc5..02930d57 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala @@ -65,11 +65,12 @@ class FallConfigSuite extends AnyFlatSpec { | to_exposure_strategy = "latest_purchase_based" | } | } - | interaction { + | interactions { | level: 5 + | minimum_duration: 50 days | } | patients { - | start_gap_in_months: 2 + | start_gap_in_months: 2 | } | drugs { | level: "Pharmacological" @@ -97,7 +98,11 @@ class FallConfigSuite extends AnyFlatSpec { endThresholdGc = 900.days, toExposureStrategy = LatestPurchaseBased ) - ), drugs = defaultConf.drugs.copy( + ), interactions = defaultConf.interactions.copy( + 5, + 50.days + ), + drugs = defaultConf.drugs.copy( level = PharmacologicalLevel ), runParameters = defaultConf.runParameters.copy(exposure = List("Patients", "DrugPurchases", "Exposures")) ) From 96ef5916d1a676d63bcace7adda2e3c1e480f5fc Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Fri, 10 Apr 2020 16:03:53 +0200 Subject: [PATCH 33/38] CNAM-458: Correct bug for Hospitalised fractures. The bug deletes all the Diagnoses events of patient, if at some point he recorded a CCAM act belonging to CCAM exceptions list. The expected behaviour is to only delete the diagnosis that has the same hospital stay as the act that has the CCAM exception code. --- .../fall/fractures/FracturesTransformer.scala | 6 +- .../fractures/HospitalizedFractures.scala | 118 +++++++++------- .../HospitalizedFracturesSuite.scala | 127 +++++++----------- 3 files changed, 128 insertions(+), 123 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformer.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformer.scala index 082b7cc8..b9aebb1e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformer.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformer.scala @@ -25,7 +25,11 @@ class FracturesTransformer(config: FallConfig) extends OutcomesTransformer with diagnoses: Dataset[Event[Diagnosis]]): Dataset[Event[Outcome]] = { // Hospitalized fractures - val hospitalizedFractures = HospitalizedFractures.transform(diagnoses, acts, config.sites.sites) + val hospitalizedFractures = HospitalizedFractures.transform( + diagnoses, + acts.filter(_.category == McoCCAMAct.category), + config.sites.sites + ) // Liberal Fractures val liberalFractures = LiberalFractures.transform(liberalActs) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFractures.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFractures.scala index 32f0fa61..ec8af670 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFractures.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFractures.scala @@ -2,8 +2,8 @@ package fr.polytechnique.cmap.cnam.study.fall.fractures -import org.apache.spark.sql.functions._ import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.functions._ import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.transformers.outcomes.OutcomesTransformer import fr.polytechnique.cmap.cnam.study.fall.codes.FractureCodes @@ -13,39 +13,12 @@ import fr.polytechnique.cmap.cnam.study.fall.codes.FractureCodes * https://datainitiative.atlassian.net/wiki/spaces/CFC/pages/61282101/General+fractures+Fall+study */ -case class HospitalStayID(patientID: String, id: String) +case class HospitalStayID(patientID: String, groupID: String) object HospitalizedFractures extends OutcomesTransformer with FractureCodes { override val outcomeName: String = "hospitalized_fall" - def transform( - diagnoses: Dataset[Event[Diagnosis]], - acts: Dataset[Event[MedicalAct]], ghmSites: List[BodySite]): Dataset[Event[Outcome]] = { - - import diagnoses.sqlContext.implicits._ - val ghmCodes = BodySite.extractCIM10CodesFromSites(ghmSites) - val correctCIM10Event = diagnoses - .filter(diagnosis => isFractureDiagnosis(diagnosis, ghmCodes)) - - val incorrectGHMStays = acts - .filter(isBadGHM _) - .map(event => HospitalStayID(event.patientID, event.groupID)) - .distinct() - - filterHospitalStay(correctCIM10Event, incorrectGHMStays) - .map( - event => Outcome( - event.patientID, - BodySite.getSiteFromCode(event.value, ghmSites, CodeType.CIM10), - outcomeName, - event.weight, - event.start - ) - ) - - } - def isFractureDiagnosis(event: Event[Diagnosis], ghmSites: List[String]): Boolean = { isInCodeList(event, ghmSites.toSet) } @@ -59,17 +32,16 @@ object HospitalizedFractures extends OutcomesTransformer with FractureCodes { } /** - * filters diagnosis that do not have a DP in the same hospital stay - * and the diagnosis that relates to an incorrectGHMStay + * Filter out Diagnoses which do not have a MainDiagnosis during the same HospitalStay that is Fracture Diagnosis. + * + * @param diagnoses Fracture Diagnoses with DP, DA and DR diagnoses. + * @return Diagnoses with a DP in the same hospital stay that is a fracture diagnosis. */ - def filterHospitalStay( - events: Dataset[Event[Diagnosis]], - incorrectGHMStays: Dataset[HospitalStayID]) - : Dataset[Event[Diagnosis]] = { - - val spark: SparkSession = events.sparkSession - import spark.implicits._ - val fracturesDiagnoses = events + def filterDiagnosesWithoutDP(diagnoses: Dataset[Event[Diagnosis]]): Dataset[Event[Diagnosis]] = { + + import diagnoses.sparkSession.implicits._ + + diagnoses .groupByKey(_.groupID) .flatMapGroups { case (_, diagnoses) => val diagnosisStream = diagnoses.toStream @@ -78,17 +50,71 @@ object HospitalizedFractures extends OutcomesTransformer with FractureCodes { } else { Seq.empty } - }.toDF() + } + } + /** + * Get the ID of Hospital Stays that are mainly for fracture followup such as plaster removal. + * + * @param acts Contains all CCAM codes Events from different sources. + * @return Hospital Stays id for fracture followup. + */ + def getFractureFollowUpStays(acts: Dataset[Event[MedicalAct]]): Dataset[HospitalStayID] = { + import acts.sparkSession.implicits._ - val patientsToFilter = incorrectGHMStays.select("patientID") - fracturesDiagnoses - .join(broadcast(patientsToFilter), Seq("patientID"), "left_anti") - .as[Event[Diagnosis]] + acts + .filter(_.category == McoCCAMAct.category) + .filter(isBadGHM _) + .map(event => HospitalStayID(event.patientID, event.groupID)) + .distinct() } - def isMainOrDASDiagnosis(event: Event[Diagnosis]): Boolean = { - event.category == McoMainDiagnosis.category || event.category == McoAssociatedDiagnosis.category + /** + * Filter out Diagnosis who share a groupID in the followUpStaysForFractures. + * + * @param fracturesDiagnoses Dataset of fracture diagnosis. + * @param followUpStaysForFractures Dataset of hospital stays for followup of fractures. + * @return + */ + def filterDiagnosisForFracturesFollowUp( + fracturesDiagnoses: Dataset[Event[Diagnosis]], + followUpStaysForFractures: Dataset[HospitalStayID] + ): Dataset[Event[Diagnosis]] = { + import fracturesDiagnoses.sparkSession.implicits._ + fracturesDiagnoses + .joinWith( + broadcast(followUpStaysForFractures), + fracturesDiagnoses(Event.Columns.PatientID) === followUpStaysForFractures("patientID") + && fracturesDiagnoses(Event.Columns.GroupID) === followUpStaysForFractures("groupID"), + "left_outer" + ) + .filter(_._2 == null) + .map(_._1) } + def transform( + diagnoses: Dataset[Event[Diagnosis]], + acts: Dataset[Event[MedicalAct]], + ghmSites: List[BodySite] + ): Dataset[Event[Outcome]] = { + + import diagnoses.sqlContext.implicits._ + val ghmCodes = BodySite.extractCIM10CodesFromSites(ghmSites) + val diagnosisWithDP = diagnoses.filter(diagnosis => isFractureDiagnosis(diagnosis, ghmCodes)) + .transform(filterDiagnosesWithoutDP) + val fractureFollowUpHospitalStays = getFractureFollowUpStays(acts) + + + filterDiagnosisForFracturesFollowUp(diagnosisWithDP, fractureFollowUpHospitalStays) + .map( + event => Outcome( + event.patientID, + BodySite.getSiteFromCode(event.value, ghmSites, CodeType.CIM10), + outcomeName, + event.weight, + event.start + ) + ) + + } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFracturesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFracturesSuite.scala index 7fa07622..bb4995dc 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFracturesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFracturesSuite.scala @@ -2,6 +2,7 @@ package fr.polytechnique.cmap.cnam.study.fall.fractures +import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Outcome, _} import fr.polytechnique.cmap.cnam.util.functions._ @@ -9,112 +10,84 @@ import fr.polytechnique.cmap.cnam.util.functions._ class HospitalizedFracturesSuite extends SharedContext { - "isInCodeList" should "return yes if there is a code with the right start" in { - // Given - val event = McoMainDiagnosis("Pierre", "3", "jambe cassée", makeTS(2017, 7, 18)) - val codes = Set("jam", "bon", "de", "bayonne") - - // When - val result = HospitalizedFractures.isInCodeList(event, codes) - - // Then - assert(result) - } - - it should "return yes if there is an exact same code" in { - // Given - val event = McoMainDiagnosis("Pierre", "3", "jambe cassée", makeTS(2017, 7, 18)) - val codes = Set("jambe cassée", "bon", "de", "bayonne") - - // When - val result = HospitalizedFractures.isInCodeList(event, codes) - - // Then - assert(result) - } + "filterDiagnosesWithoutDP" should + "filter out LinkedDiagnosis and AssociatedDiagnoses that has not a MainDiagnosis" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ - it should "return no if there is no correct code" in { // Given - val event = McoMainDiagnosis("Pierre", "3", "jambe cassée", makeTS(2017, 7, 18)) - val codes = Set("avada kedavra", "bon", "de", "bayonne") - - // When - val result = HospitalizedFractures.isInCodeList(event, codes) - - // Then - assert(!result) - } + val diagnoses: Dataset[Event[Diagnosis]] = Seq( + McoMainDiagnosis("Pierre", "3", "S02.42", makeTS(2017, 7, 18)), + McoMainDiagnosis("Paul", "1", "S42.54678", makeTS(2017, 7, 20)), + McoMainDiagnosis("Paul", "7", "S02.42", makeTS(2017, 1, 2)), + McoAssociatedDiagnosis("Jacques", "8", "qu'est-ce-que tu fais là?", makeTS(2017, 7, 18)), + McoLinkedDiagnosis("Jacques", "8", "qu'est-ce-que tu fais là?", makeTS(2017, 7, 18)), + McoAssociatedDiagnosis("Paul", "7", "S02.42", makeTS(2017, 1, 2)), + McoLinkedDiagnosis("Pierre", "3", "S02.42", makeTS(2017, 7, 18)) + ).toDS - "isFractureDiagnosis" should "return yes for correct CIM10 code" in { - // Given - val event = McoMainDiagnosis("Pierre", "3", "S02.35", makeTS(2017, 7, 18)) + val expected: Dataset[Event[Diagnosis]] = Seq( + McoMainDiagnosis("Pierre", "3", "S02.42", makeTS(2017, 7, 18)), + McoMainDiagnosis("Paul", "1", "S42.54678", makeTS(2017, 7, 20)), + McoMainDiagnosis("Paul", "7", "S02.42", makeTS(2017, 1, 2)), + McoAssociatedDiagnosis("Paul", "7", "S02.42", makeTS(2017, 1, 2)), + McoLinkedDiagnosis("Pierre", "3", "S02.42", makeTS(2017, 7, 18)) + ).toDS // When - val result = HospitalizedFractures.isFractureDiagnosis(event, AllSites.codesCIM10) - + val result = HospitalizedFractures.filterDiagnosesWithoutDP(diagnoses) // Then - assert(result) + assertDSs(result, expected) } - "isMainDiagnosis" should "return yes for correct DP code" in { - // Given - val event = McoMainDiagnosis("Pierre", "3", "whatever", makeTS(2017, 7, 18)) - - // When - val result = HospitalizedFractures.isMainOrDASDiagnosis(event) - - // Then - assert(result) - } + "getFractureFollowUpStays" should "get the HospitalStay where the CCAM is in CCAMExceptions" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ - it should "return no for other code" in { // Given - val event = McoLinkedDiagnosis("Pierre", "3", "whatever", makeTS(2017, 7, 18)) - - // When - val result = HospitalizedFractures.isMainOrDASDiagnosis(event) - - // Then - assert(!result) - } + val medicalActs: Dataset[Event[MedicalAct]] = Seq( + McoCCAMAct("Paul", "1", "LJGA001", makeTS(2017, 7, 20)), + McoCCAMAct("Paul", "1", "Whatever", makeTS(2017, 12, 20)) + ).toDS - "isBadGHM" should "return yes for correct GHM code" in { - // Given - val event = McoCCAMAct("Pierre", "3", "LJGA001", makeTS(2017, 7, 18)) + val expected: Dataset[HospitalStayID] = Seq( + HospitalStayID("Paul", "1") + ).toDS // When - val result = HospitalizedFractures.isBadGHM(event) - + val result = HospitalizedFractures.getFractureFollowUpStays(medicalActs) // Then - assert(result) + assertDSs(result, expected) } - - "filterHospitalStay" should "return correct dataset" in { + "filterDiagnosisForFracturesFollowUp" should + "return Diagnosis which has not a fracture followup hospital stay" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val input = List( + val input: Dataset[Event[Diagnosis]] = List( McoMainDiagnosis("Paul", "1", "hemorroides", makeTS(2017, 7, 20)), - McoMainDiagnosis("Pierre", "3", "jambe cassée", makeTS(2017, 7, 18)) + McoMainDiagnosis("Pierre", "3", "jambe cassée", makeTS(2017, 7, 18)), + McoMainDiagnosis("Pierre", "4", "jambe cassée", makeTS(2016, 7, 18)) ).toDS val badStays = Seq( HospitalStayID("Pierre", "3") ).toDS - val expected = List( - McoMainDiagnosis("Paul", "1", "hemorroides", makeTS(2017, 7, 20)) + val expected: Dataset[Event[Diagnosis]] = List( + McoMainDiagnosis("Paul", "1", "hemorroides", makeTS(2017, 7, 20)), + McoMainDiagnosis("Pierre", "4", "jambe cassée", makeTS(2016, 7, 18)) ).toDS // When - val result = HospitalizedFractures.filterHospitalStay(input, badStays) + val result = HospitalizedFractures.filterDiagnosisForFracturesFollowUp(input, badStays) // Then assertDSs(result, expected) } - "transform" should "return correct Outcome dataset" in { + "transform" should "return Fractures Event Dataset based on the algorithm" in { val sqlCtx = sqlContext import sqlCtx.implicits._ @@ -122,7 +95,7 @@ class HospitalizedFracturesSuite extends SharedContext { val diagnoses = Seq( McoMainDiagnosis("Pierre", "3", "S02.42", makeTS(2017, 7, 18)), McoMainDiagnosis("Paul", "1", "S42.54678", makeTS(2017, 7, 20)), - McoMainDiagnosis("Paul", "7", "hemorroides", makeTS(2017, 1, 2)), + McoMainDiagnosis("Paul", "7", "S02.42", makeTS(2017, 1, 2)), McoAssociatedDiagnosis("Jacques", "8", "qu'est-ce-que tu fais là?", makeTS(2017, 7, 18)) ).toDS @@ -131,7 +104,8 @@ class HospitalizedFracturesSuite extends SharedContext { ).toDS val expected = Seq( - Outcome("Pierre", "AllSites", "hospitalized_fall", makeTS(2017, 7, 18)) + Outcome("Pierre", "AllSites", "hospitalized_fall", makeTS(2017, 7, 18)), + Outcome("Paul", "AllSites", "hospitalized_fall", makeTS(2017, 1, 2)) ).toDS // When @@ -150,7 +124,7 @@ class HospitalizedFracturesSuite extends SharedContext { McoMainDiagnosis("Jean", "2", "S02.42", 3.0, makeTS(2017, 7, 18)), McoMainDiagnosis("Kevin", "4", "S02.42", 4.0, makeTS(2017, 7, 18)), McoMainDiagnosis("Paul", "1", "S42.54678", 2.0, makeTS(2017, 7, 20)), - McoMainDiagnosis("Paul", "7", "hemorroides", 2.0, makeTS(2017, 1, 2)), + McoMainDiagnosis("Paul", "7", "S42.54678", 2.0, makeTS(2017, 1, 2)), McoAssociatedDiagnosis("Jacques", "8", "qu'est-ce-que tu fais là?", 2.0, makeTS(2017, 7, 18)) ).toDS @@ -161,7 +135,8 @@ class HospitalizedFracturesSuite extends SharedContext { val expected = Seq( Outcome("Pierre", "AllSites", "hospitalized_fall", 2.0, makeTS(2017, 7, 18)), Outcome("Jean", "AllSites", "hospitalized_fall", 3.0, makeTS(2017, 7, 18)), - Outcome("Kevin", "AllSites", "hospitalized_fall", 4.0, makeTS(2017, 7, 18)) + Outcome("Kevin", "AllSites", "hospitalized_fall", 4.0, makeTS(2017, 7, 18)), + Outcome("Paul", "AllSites", "hospitalized_fall", 2.0, makeTS(2017, 1, 2)) ).toDS // When From 7d10b9cdbb66ed84f68092dfad00259773f02136 Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Tue, 14 Apr 2020 14:51:19 +0200 Subject: [PATCH 34/38] CNAM-458: Add missing Columns. CNAM-458: Update severity algorithm. --- src/main/resources/config/fall/default.conf | 2 +- .../cmap/cnam/study/fall/FallMain.scala | 71 +++++++---- .../cnam/study/fall/FallMainExtract.scala | 42 +++++-- .../cnam/study/fall/FallMainTransform.scala | 9 +- .../cnam/study/fall/config/FallConfig.scala | 5 +- .../study/fall/extractors/ActsExtractor.scala | 17 ++- .../fall/extractors/DiagnosisExtractor.scala | 6 +- .../FallHospitalStayExtractor.scala | 62 +++++++++ .../extractors/McoDiagnosisExtractor.scala | 47 ------- .../fall/fractures/FracturesTransformer.scala | 6 +- .../fractures/HospitalizedFractures.scala | 58 ++++++++- .../fall/fractures/LiberalFractures.scala | 2 +- .../PrivateAmbulatoryFractures.scala | 2 +- .../fractures/PublicAmbulatoryFractures.scala | 2 +- .../cnam/study/fall/fractures/Surgery.scala | 2 +- .../FallMainExtractorTransformSuite.scala | 2 +- .../study/fall/config/FallConfigSuite.scala | 2 +- .../fractures/FracturesTransformerSuite.scala | 26 ++-- .../HospitalizedFracturesSuite.scala | 119 ++++++++++++++---- .../fractures/LiberalFracturesSuite.scala | 8 +- .../PrivateAmbulatoryFracturesSuite.scala | 7 +- .../PublicAmbulatoryFracturesSuite.scala | 7 +- 22 files changed, 354 insertions(+), 150 deletions(-) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala diff --git a/src/main/resources/config/fall/default.conf b/src/main/resources/config/fall/default.conf index ad576c14..d3a30116 100644 --- a/src/main/resources/config/fall/default.conf +++ b/src/main/resources/config/fall/default.conf @@ -29,7 +29,7 @@ root { fall_frame: 0 months // fractures are grouped if they happen in the same site within the period fallFrame, (default value 0 means no group) } run_parameters { - outcome: ["Acts", "Diagnoses", "Outcomes"] // pipeline of calculation of outcome, possible values : Acts, Diagnoses, and Outcomes + outcome: ["Acts", "Diagnoses", "HospitalDeaths", "Outcomes"] // pipeline of calculation of outcome, possible values : Acts, Diagnoses, and Outcomes exposure: ["Patients", "StartGapPatients", "DrugPurchases", "Exposures"] // pipeline of the calculation of exposure, possible values : Patients, StartGapPatients, DrugPurchases, Exposures } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala index 8b5fc199..04360c65 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala @@ -6,6 +6,7 @@ import scala.collection.mutable import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event, FollowUp, Outcome} +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters @@ -41,9 +42,7 @@ object FallMain extends Main with FractureCodes { val dcir = sources.dcir.get.repartition(4000).persist() val mco = sources.mco.get.repartition(4000).persist() - val operationsMetadata = computeControls(sources, fallConfig) ++ - computeExposures(sources, fallConfig) ++ - computeOutcomes(sources, fallConfig) + val operationsMetadata = computeOutcomes(sources, fallConfig) dcir.unpersist() mco.unpersist() @@ -240,10 +239,7 @@ object FallMain extends Main with FractureCodes { fallConfig.output.saveMode ) } - - } - operationsMetadata } @@ -252,28 +248,28 @@ object FallMain extends Main with FractureCodes { val operationsMetadata = mutable.Buffer[OperationMetadata]() val optionDiagnoses = if (fallConfig.runParameters.diagnoses) { - logger.info("diagnoses") val diagnoses = new DiagnosisExtractor(fallConfig.diagnoses).extract(sources).persist() val diagnosesPopulation = DiagnosisCounter.process(diagnoses) operationsMetadata += { OperationReporter.reportDataAndPopulationAsDataSet( - "diagnoses", - List("MCO", "IR_IMB_R"), - OperationTypes.Diagnosis, - diagnoses, - diagnosesPopulation, - Path(fallConfig.output.outputSavePath), - fallConfig.output.saveMode - ) + "diagnoses", + List("MCO", "IR_IMB_R"), + OperationTypes.Diagnosis, + diagnoses, + diagnosesPopulation, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) } Some(diagnoses) } else { None } - val (optionActs, optionLiberalActs) = if (fallConfig.runParameters.acts) { - logger.info("Medical Acts") - val acts = new ActsExtractor(fallConfig.medicalActs).extract(sources).persist() + val (optionActs, optionLiberalActs, optionSurgeries) = if (fallConfig.runParameters.acts) { + val (acts, surgeries) = new ActsExtractor(fallConfig.medicalActs).extract(sources) + acts.cache() + surgeries.cache() operationsMetadata += { OperationReporter .report( @@ -285,7 +281,17 @@ object FallMain extends Main with FractureCodes { fallConfig.output.saveMode ) } - logger.info("Liberal Medical Acts") + operationsMetadata += { + OperationReporter + .report( + "fracture_surgeries", + List("MCO"), + OperationTypes.MedicalActs, + surgeries.toDF, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) + } val liberalActs = LiberalActsTransformer.transform(acts).persist() operationsMetadata += { OperationReporter @@ -298,20 +304,36 @@ object FallMain extends Main with FractureCodes { fallConfig.output.saveMode ) } - (Some(acts), Some(liberalActs)) + (Some(acts), Some(liberalActs), Some(surgeries)) + } else { + (None, None, None) + } + val optionHospitalDeaths = if (fallConfig.runParameters.hospitalDeaths) { + val hospitalDeaths = new FallHospitalStayExtractor(SimpleExtractorCodes(List(Death.value))).extract(sources) + operationsMetadata += { + OperationReporter + .report( + "hospital_deaths", + List("MCO"), + OperationTypes.HospitalStays, + hospitalDeaths.toDF, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) + } + Some(hospitalDeaths) } else { - (None, None) + None } if (fallConfig.runParameters.outcomes) { - logger.info("Fractures") val fractures: Dataset[Event[Outcome]] = new FracturesTransformer(fallConfig) - .transform(optionLiberalActs.get, optionActs.get, optionDiagnoses.get) + .transform(optionLiberalActs.get, optionActs.get, optionDiagnoses.get, optionSurgeries.get, optionHospitalDeaths.get) operationsMetadata += { OperationReporter .report( "fractures", - List("acts"), + List("acts", "diagnoses"), OperationTypes.Outcomes, fractures.toDF, Path(fallConfig.output.outputSavePath), @@ -319,7 +341,6 @@ object FallMain extends Main with FractureCodes { ) } } - operationsMetadata } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala index 25e5adc0..04c6089c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala @@ -6,6 +6,7 @@ import scala.collection.mutable import org.apache.spark.sql.{Dataset, SQLContext} import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events.DcirAct +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.implicits @@ -114,10 +115,9 @@ object FallMainExtract extends Main with FractureCodes { mutable.HashMap[String, OperationMetadata] = { if (fallConfig.runParameters.diagnoses) { - logger.info("diagnoses") val diagnoses = new DiagnosisExtractor(fallConfig.diagnoses).extract(sources).persist() val diagnosesPopulation = DiagnosisCounter.process(diagnoses) - val diagnoses_report = OperationReporter.reportDataAndPopulationAsDataSet( + val diagnosesReport = OperationReporter.reportDataAndPopulationAsDataSet( "diagnoses", List("MCO", "IR_IMB_R"), OperationTypes.Diagnosis, @@ -127,14 +127,15 @@ object FallMainExtract extends Main with FractureCodes { fallConfig.output.saveMode ) meta += { - diagnoses_report.name -> diagnoses_report + diagnosesReport.name -> diagnosesReport } } if (fallConfig.runParameters.acts) { - logger.info("Medical Acts") - val acts = new ActsExtractor(fallConfig.medicalActs).extract(sources).persist() - val acts_report = OperationReporter.reportAsDataSet( + val (acts, surgeries) = new ActsExtractor(fallConfig.medicalActs).extract(sources) + acts.persist() + surgeries.persist() + val actsReport = OperationReporter.reportAsDataSet( "acts", List("DCIR", "MCO", "MCO_CE"), OperationTypes.MedicalActs, @@ -143,9 +144,21 @@ object FallMainExtract extends Main with FractureCodes { fallConfig.output.saveMode ) meta += { - acts_report.name -> acts_report + actsReport.name -> actsReport } - logger.info("Liberal Medical Acts") + + val surgeriesReport = OperationReporter.reportAsDataSet( + "surgeries", + List("MCO"), + OperationTypes.MedicalActs, + surgeries, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) + meta += { + surgeriesReport.name -> surgeriesReport + } + val liberalActs = acts .filter(act => act.groupID == DcirAct.groupID.Liberal && !CCAMExceptions.contains(act.value)).persist() val liberal_acts_report = OperationReporter.reportAsDataSet( @@ -159,6 +172,19 @@ object FallMainExtract extends Main with FractureCodes { meta += { liberal_acts_report.name -> liberal_acts_report } + + val hospitalDeaths = new FallHospitalStayExtractor(SimpleExtractorCodes(List(Death.value))).extract(sources) + val hospitalDeathsReport = OperationReporter.reportAsDataSet( + "hospital_deaths", + List("MCO"), + OperationTypes.HospitalStays, + hospitalDeaths, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) + meta += { + hospitalDeathsReport.name -> hospitalDeathsReport + } } meta } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala index 8968a115..7626e0d6 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala @@ -190,10 +190,15 @@ object FallMainTransform extends Main with FractureCodes { val liberalActs = spark.read.parquet(meta("liberal_acts").outputPath) .as[Event[MedicalAct]] + val surgeries = spark.read.parquet(meta("surgeries").outputPath) + .as[Event[MedicalAct]] + + val hospitalDeaths = spark.read.parquet(meta("hospital_deaths").outputPath) + .as[Event[HospitalStay]] + if (fallConfig.runParameters.outcomes) { - logger.info("Fractures") val fractures: Dataset[Event[Outcome]] = new FracturesTransformer(fallConfig) - .transform(liberalActs, acts, diagnoses) + .transform(liberalActs, acts, diagnoses, surgeries, hospitalDeaths) val fractures_report = OperationReporter.reportAsDataSet( "fractures", List("acts"), diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala index e9f8e297..ec97c355 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfig.scala @@ -100,7 +100,7 @@ object FallConfig extends FallConfigLoader with FractureCodes { /** Parameters if run the calculation of outcome or exposure **/ case class RunConfig( - outcome: List[String] = List("Acts", "Diagnoses", "Outcomes"), + outcome: List[String] = List("Acts", "Diagnoses", "HospitalDeaths", "Outcomes"), exposure: List[String] = List("Patients", "StartGapPatients", "DrugPurchases", "Exposures"), hospitalStay: List[String] = List("HospitalStay")) { //exposures @@ -111,7 +111,8 @@ object FallConfig extends FallConfigLoader with FractureCodes { //outcomes val diagnoses: Boolean = outcome contains "Diagnoses" val acts: Boolean = outcome contains "Acts" - val outcomes: Boolean = List("Diagnoses", "Acts", "Outcomes").forall(outcome.contains) + val hospitalDeaths: Boolean = outcome contains "HospitalDeaths" + val outcomes: Boolean = List("Diagnoses", "Acts", "HospitalDeaths", "Outcomes").forall(outcome.contains) // Hospital Stays val hospitalStays: Boolean = hospitalStay contains "HospitalStay" } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala index 48590593..1f0041dc 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/ActsExtractor.scala @@ -7,17 +7,26 @@ import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, Event, MedicalAct} import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.events.acts._ import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.study.fall.fractures.Surgery import fr.polytechnique.cmap.cnam.util.functions.unionDatasets -class ActsExtractor(config: MedicalActsConfig) { - def extract(sources: Sources): Dataset[Event[MedicalAct]] = { +class ActsExtractor(config: MedicalActsConfig) extends Serializable { + def extract(sources: Sources): (Dataset[Event[MedicalAct]], Dataset[Event[MedicalAct]]) = { val dcirMedicalAct = DcirMedicalActExtractor(SimpleExtractorCodes(config.dcirCodes)).extract(sources) .filter(act => act.groupID != DcirAct.groupID.Unknown) // filter out unknown source acts .filter(act => act.groupID != DcirAct.groupID.PublicAmbulatory) //filter out public amb val mcoCEMedicalActs = McoCeCcamActExtractor(SimpleExtractorCodes(config.mcoCECodes)).extract(sources) - val mcoMedicalActs = McoCcamActExtractor(SimpleExtractorCodes(config.mcoCCAMCodes)).extract(sources) - unionDatasets(dcirMedicalAct, mcoCEMedicalActs, mcoMedicalActs) + val surgeryCodes = Surgery.surgeryCodes + val ccamCodes = config.mcoCCAMCodes + val allMcoActs = McoCcamActExtractor(SimpleExtractorCodes(ccamCodes ++ surgeryCodes)) + .extract(sources) + .cache() + + val fractureSurgeries = allMcoActs.filter(md => surgeryCodes.exists(code => code.startsWith(md.value))) + val mcoMedicalActs = allMcoActs.filter(md => ccamCodes.exists(code => code.startsWith(md.value))) + + (unionDatasets(dcirMedicalAct, mcoCEMedicalActs, mcoMedicalActs), fractureSurgeries) } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala index d8d6edff..218281a7 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/DiagnosisExtractor.scala @@ -13,9 +13,9 @@ class DiagnosisExtractor(config: DiagnosesConfig) { def extract(sources: Sources): Dataset[Event[Diagnosis]] = { - val mainDiag = MainDiagnosisFallExtractor(SimpleExtractorCodes(config.dpCodes)).extract(sources) - val linkedDiag = LinkedDiagnosisFallExtractor(SimpleExtractorCodes(config.drCodes)).extract(sources) - val dasDiag = AssociatedDiagnosisFallExtractor(SimpleExtractorCodes(config.daCodes)).extract(sources) + val mainDiag = McoMainDiagnosisExtractor(SimpleExtractorCodes(config.dpCodes)).extract(sources) + val linkedDiag = McoLinkedDiagnosisExtractor(SimpleExtractorCodes(config.drCodes)).extract(sources) + val dasDiag = McoAssociatedDiagnosisExtractor(SimpleExtractorCodes(config.daCodes)).extract(sources) unionDatasets(mainDiag, linkedDiag, dasDiag) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractor.scala new file mode 100644 index 00000000..fd54690f --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractor.scala @@ -0,0 +1,62 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.fall.extractors + +import java.sql.{Date, Timestamp} +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.events.{EventBuilder, HospitalStay, McoHospitalStay} +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mco.McoSimpleExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.IsInStrategy + +class FallHospitalStayExtractor(codes: SimpleExtractorCodes) extends McoSimpleExtractor[HospitalStay] + with IsInStrategy[HospitalStay] { + val exitCodes: (String) => (ExitMode) = { + case "0" => TransferAct + case "6" => Mutation + case "7" => Transfer + case "8" => Home + case "9" => Death + case _ => Unknown + } + + override def getCodes: SimpleExtractorCodes = codes + + override def columnName: String = ColNames.ExitMode + + override def eventBuilder: EventBuilder = McoHospitalStay + + override def neededColumns: List[String] = List(ColNames.EndDate, ColNames.ExitMode) ++ super.usedColumns + + override def extractEnd(r: Row): Option[Timestamp] = Some(new Timestamp(r.getAs[Date](ColNames.EndDate).getTime)) + + override def extractValue(row: Row): String = exitCodes(row.getAs[String](columnName)).value +} + +sealed trait ExitMode extends Serializable { + def value: String +} + +object Death extends ExitMode { + override def value: String = "death" +} + +object Mutation extends ExitMode { + override def value: String = "mutation" +} + +object Transfer extends ExitMode { + override def value: String = "transfer" +} + +object Home extends ExitMode { + override def value: String = "home" +} + +object TransferAct extends ExitMode { + override def value: String = "transfer_act" +} + +object Unknown extends ExitMode { + override def value: String = "unknown" +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala deleted file mode 100644 index 18cbb4db..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/McoDiagnosisExtractor.scala +++ /dev/null @@ -1,47 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.study.fall.extractors - -import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses.{McoAssociatedDiagnosisExtractor, McoLinkedDiagnosisExtractor, McoMainDiagnosisExtractor} -import fr.polytechnique.cmap.cnam.etl.extractors.sources.mco.McoRowExtractor -import fr.polytechnique.cmap.cnam.study.fall.fractures.Surgery - -trait ClassifyWeight extends Surgery { - self : McoRowExtractor => - /** It gets ExitMode from row. - * - * @param r The row itself. - * @return The value of ExitMode. - */ - def getExit(r: Row): String = r.getAs[String](ColNames.ExitMode) - - override def extractWeight(r: Row): Double = { - if (!r.isNullAt(r.fieldIndex(ColNames.ExitMode)) && getExit(r).equals("9")) { - 4 - } else if (!r.isNullAt(r.fieldIndex(ColNames.CCAM)) && surgeryCodes.contains(r.getAs[String](ColNames.CCAM))) { - 3 - } else { - 2 - } - } -} - -class MainDiagnosisFallExtractor(codes: SimpleExtractorCodes) extends McoMainDiagnosisExtractor(codes) with ClassifyWeight - -object MainDiagnosisFallExtractor { - def apply(codes: SimpleExtractorCodes): MainDiagnosisFallExtractor = new MainDiagnosisFallExtractor(codes) -} - -class AssociatedDiagnosisFallExtractor(codes: SimpleExtractorCodes) extends McoAssociatedDiagnosisExtractor(codes) with ClassifyWeight - -object AssociatedDiagnosisFallExtractor { - def apply(codes: SimpleExtractorCodes): AssociatedDiagnosisFallExtractor = new AssociatedDiagnosisFallExtractor(codes) -} - -class LinkedDiagnosisFallExtractor(codes: SimpleExtractorCodes) extends McoLinkedDiagnosisExtractor(codes) with ClassifyWeight - -object LinkedDiagnosisFallExtractor { - def apply(codes: SimpleExtractorCodes): LinkedDiagnosisFallExtractor = new LinkedDiagnosisFallExtractor(codes) -} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformer.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformer.scala index b9aebb1e..7054287d 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformer.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformer.scala @@ -22,12 +22,16 @@ class FracturesTransformer(config: FallConfig) extends OutcomesTransformer with def transform( liberalActs: Dataset[Event[MedicalAct]], acts: Dataset[Event[MedicalAct]], - diagnoses: Dataset[Event[Diagnosis]]): Dataset[Event[Outcome]] = { + diagnoses: Dataset[Event[Diagnosis]], + surgeries: Dataset[Event[MedicalAct]], + hospitalDeaths: Dataset[Event[HospitalStay]]): Dataset[Event[Outcome]] = { // Hospitalized fractures val hospitalizedFractures = HospitalizedFractures.transform( diagnoses, acts.filter(_.category == McoCCAMAct.category), + hospitalDeaths, + surgeries, config.sites.sites ) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFractures.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFractures.scala index ec8af670..1dc314e2 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFractures.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFractures.scala @@ -2,11 +2,13 @@ package fr.polytechnique.cmap.cnam.study.fall.fractures -import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.Dataset import org.apache.spark.sql.functions._ import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.transformers.outcomes.OutcomesTransformer import fr.polytechnique.cmap.cnam.study.fall.codes.FractureCodes +import fr.polytechnique.cmap.cnam.study.fall.extractors.Death +import fr.polytechnique.cmap.cnam.util.functions.unionDatasets /* * The rules for this Outcome definition can be found on the following page: @@ -76,10 +78,8 @@ object HospitalizedFractures extends OutcomesTransformer with FractureCodes { * @param followUpStaysForFractures Dataset of hospital stays for followup of fractures. * @return */ - def filterDiagnosisForFracturesFollowUp( - fracturesDiagnoses: Dataset[Event[Diagnosis]], - followUpStaysForFractures: Dataset[HospitalStayID] - ): Dataset[Event[Diagnosis]] = { + def filterDiagnosisForFracturesFollowUp(followUpStaysForFractures: Dataset[HospitalStayID]) + (fracturesDiagnoses: Dataset[Event[Diagnosis]]): Dataset[Event[Diagnosis]] = { import fracturesDiagnoses.sparkSession.implicits._ fracturesDiagnoses .joinWith( @@ -92,9 +92,53 @@ object HospitalizedFractures extends OutcomesTransformer with FractureCodes { .map(_._1) } + def getFourthLevelSeverity(stays: Dataset[Event[HospitalStay]]) + (diagnoses: Dataset[Event[Diagnosis]]): Dataset[Event[Diagnosis]] = { + import stays.sparkSession.implicits._ + diagnoses.joinWith( + stays.filter(_.value == Death.value), + diagnoses(Event.Columns.PatientID) === stays(Event.Columns.PatientID) + && diagnoses(Event.Columns.GroupID) === stays(Event.Columns.GroupID), + "inner" + ) + .map(_._1) + } + + def getThirdLevelSeverity(surgeries: Dataset[Event[MedicalAct]]) + (diagnoses: Dataset[Event[Diagnosis]]): Dataset[Event[Diagnosis]] = { + import surgeries.sparkSession.implicits._ + diagnoses.joinWith( + surgeries, + diagnoses(Event.Columns.PatientID) === surgeries(Event.Columns.PatientID) + && diagnoses(Event.Columns.GroupID) === surgeries(Event.Columns.GroupID), + "inner" + ) + .map(_._1) + } + + def assignSeverityToDiagnosis(stays: Dataset[Event[HospitalStay]], surgeries: Dataset[Event[MedicalAct]]) + (diagnoses: Dataset[Event[Diagnosis]]): Dataset[Event[Diagnosis]] = { + + val fourthLevelSeverity = diagnoses.transform(getFourthLevelSeverity(stays)).cache() + + val notFourthLevel = diagnoses.except(fourthLevelSeverity).cache() + val thirdLevelSeverity = notFourthLevel + .transform(getThirdLevelSeverity(surgeries)).cache() + + val secondLevelSeverity = notFourthLevel.except(thirdLevelSeverity) + import surgeries.sparkSession.implicits._ + unionDatasets( + fourthLevelSeverity.map(_.copy(weight = 4D)), + thirdLevelSeverity.map(_.copy(weight = 3D)), + secondLevelSeverity.map(_.copy(weight = 2D)) + ) + } + def transform( diagnoses: Dataset[Event[Diagnosis]], acts: Dataset[Event[MedicalAct]], + stays: Dataset[Event[HospitalStay]], + surgeries: Dataset[Event[MedicalAct]], ghmSites: List[BodySite] ): Dataset[Event[Outcome]] = { @@ -105,7 +149,9 @@ object HospitalizedFractures extends OutcomesTransformer with FractureCodes { val fractureFollowUpHospitalStays = getFractureFollowUpStays(acts) - filterDiagnosisForFracturesFollowUp(diagnosisWithDP, fractureFollowUpHospitalStays) + diagnosisWithDP + .transform(filterDiagnosisForFracturesFollowUp(fractureFollowUpHospitalStays)) + .transform(assignSeverityToDiagnosis(stays, surgeries)) .map( event => Outcome( event.patientID, diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/LiberalFractures.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/LiberalFractures.scala index 7b1979bc..6aaf12ad 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/LiberalFractures.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/LiberalFractures.scala @@ -17,7 +17,7 @@ object LiberalFractures extends OutcomesTransformer { .map( event => { val fractureSite = BodySite.getSiteFromCode(event.value, BodySites.sites, CodeType.CCAM) - Outcome(event.patientID, fractureSite, outcomeName, event.weight, event.start) + Outcome(event.patientID, fractureSite, outcomeName, 1.0D, event.start) } ) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PrivateAmbulatoryFractures.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PrivateAmbulatoryFractures.scala index 0411589c..ab877eb3 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PrivateAmbulatoryFractures.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PrivateAmbulatoryFractures.scala @@ -25,7 +25,7 @@ object PrivateAmbulatoryFractures extends OutcomesTransformer with FractureCodes .map( event => { val fractureSite = BodySite.getSiteFromCode(event.value, BodySites.sites, CodeType.CCAM) - Outcome(event.patientID, fractureSite, outcomeName, event.weight, event.start) + Outcome(event.patientID, fractureSite, outcomeName, 1.0D, event.start) } ) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFractures.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFractures.scala index 07380a3b..88ab8b6b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFractures.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFractures.scala @@ -20,7 +20,7 @@ object PublicAmbulatoryFractures extends OutcomesTransformer with FractureCodes .map( event => { val fractureSite = BodySite.getSiteFromCode(event.value, BodySites.sites, CodeType.CCAM) - Outcome(event.patientID, fractureSite, outcomeName, event.weight, event.start) + Outcome(event.patientID, fractureSite, outcomeName, 1.0D, event.start) } ) } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/Surgery.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/Surgery.scala index b146e699..c0b050d1 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/Surgery.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/Surgery.scala @@ -2,7 +2,7 @@ package fr.polytechnique.cmap.cnam.study.fall.fractures -trait Surgery { +object Surgery { val surgeryCodes = Set( "QAGA004", "QZGA003", diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtractorTransformSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtractorTransformSuite.scala index 7a2c5064..90cb6fad 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtractorTransformSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtractorTransformSuite.scala @@ -33,7 +33,7 @@ class FallMainExtractorTransformSuite extends SharedContext { assertDSs(new DiagnosisExtractor(fallConfig.diagnoses).extract(sources), spark.read.parquet(meta.get("diagnoses").get.outputPath) .as(Encoders.bean(classOf[Event[Diagnosis]]))) - assertDSs(new ActsExtractor(fallConfig.medicalActs).extract(sources), + assertDSs(new ActsExtractor(fallConfig.medicalActs).extract(sources)._1, spark.read.parquet(meta.get("acts").get.outputPath) .as(Encoders.bean(classOf[Event[MedicalAct]]))) assertDSs(new Patients(PatientsConfig(fallConfig.base.studyStart)).extract(sources), diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala index 02930d57..ba2ff1c8 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/config/FallConfigSuite.scala @@ -80,7 +80,7 @@ class FallConfigSuite extends AnyFlatSpec { | sites: ["BodySites"] | } | run_parameters { - | outcome: ["Acts", "Diagnoses", "Outcomes"] // pipeline of calculation of outcome, possible values : Acts, Diagnoses, and Outcomes + | outcome: ["Acts", "Diagnoses", "HospitalDeaths", "Outcomes"] // pipeline of calculation of outcome, possible values : Acts, Diagnoses, and Outcomes | exposure: ["Patients", "DrugPurchases", "Exposures"] // pipeline of the calculation of exposure, possible values : Patients, StartGapPatients, DrugPurchases, Exposures | } | """.trim.stripMargin diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformerSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformerSuite.scala index 289434f7..449cf5a9 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformerSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/FracturesTransformerSuite.scala @@ -9,6 +9,7 @@ import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.study.fall.FallMain.CCAMExceptions import fr.polytechnique.cmap.cnam.study.fall.config.FallConfig import fr.polytechnique.cmap.cnam.study.fall.config.FallConfig.FracturesConfig +import fr.polytechnique.cmap.cnam.study.fall.extractors.Death import fr.polytechnique.cmap.cnam.util.functions.makeTS @@ -24,10 +25,10 @@ class FracturesTransformerSuite extends SharedContext { val testConf = defaultConf.copy(outcomes = FracturesConfig(fallFrame = 3.months)) val acts: Dataset[Event[MedicalAct]] = Seq( //pubic ambulatory acts - McoCeCcamAct("georgette", DcirAct.groupID.PublicAmbulatory, "MZMP007", 1.0, makeTS(2010, 2, 6)), - McoCeCcamAct("georgettebis", DcirAct.groupID.PublicAmbulatory, "MZMP007", 1.0, makeTS(2010, 2, 6)), - McoCeCcamAct("george", DcirAct.groupID.PublicAmbulatory, "whatever", 1.0, makeTS(2010, 2, 6)), - DcirAct("john", DcirAct.groupID.PublicAmbulatory, "MZMP007", 1.0, makeTS(2010, 2, 6)), + McoCeCcamAct("georgette", McoCeCcamAct.category, "MZMP007", 1.0, makeTS(2010, 2, 6)), + McoCeCcamAct("georgettebis", McoCeCcamAct.category, "MZMP007", 1.0, makeTS(2010, 2, 6)), + McoCeCcamAct("george", McoCeCcamAct.category, "whatever", 1.0, makeTS(2010, 2, 6)), + DcirAct("john", McoCeCcamAct.category, "MZMP007", 1.0, makeTS(2010, 2, 6)), //private ambulatory acts DcirAct("riri", DcirAct.groupID.PrivateAmbulatory, "NBEP002", 1.0, makeTS(2007, 1, 1)), DcirAct("fifi", DcirAct.groupID.PrivateAmbulatory, "stupidcode", 1.0, makeTS(2007, 1, 1)), @@ -47,18 +48,26 @@ class FracturesTransformerSuite extends SharedContext { McoMainDiagnosis("emile", "3", "S222", 2.0, makeTS(2017, 7, 18)), McoMainDiagnosis("emile", "3", "S222", 3.0, makeTS(2017, 7, 18)), McoMainDiagnosis("emile", "3", "S222", 4.0, makeTS(2017, 7, 18)), - McoMainDiagnosis("kevin", "BassinRachis", "S327", 3.0, makeTS(2017, 7, 18)), + McoMainDiagnosis("kevin", "1", "S327", 3.0, makeTS(2017, 7, 18)), McoMainDiagnosis("jean", "4", "S120", 4.0, makeTS(2017, 7, 18)), McoMainDiagnosis("Paul", "1", "S42.54678", makeTS(2017, 7, 20)), - McoMainDiagnosis("Paul", "7", "hemorroides", makeTS(2017, 1, 2)), + McoMainDiagnosis("Paul", "7", "S42.54678", makeTS(2017, 1, 2)), McoAssociatedDiagnosis("Jacques", "8", "qu'est-ce-que tu fais là?", makeTS(2017, 7, 18)) ).toDS + val surgeries: Dataset[Event[MedicalAct]] = Seq[Event[MedicalAct]]( + McoCCAMAct("kevin", "1", "NHDA007", makeTS(2017, 7, 18)) + ).toDS() + + val hospitalDeaths: Dataset[Event[HospitalStay]] = Seq[Event[HospitalStay]]( + HospitalStay("emile", "3", Death.value, 0D, makeTS(2017, 7, 18), Some(makeTS(2017, 7, 18))) + ).toDS() + val expectedOutcomes = Seq( //hospitalization Outcome("emile", "Ribs", "hospitalized_fall", 4.0, makeTS(2017, 7, 18)), Outcome("kevin", "BassinRachis", "hospitalized_fall", 3.0, makeTS(2017, 7, 18)), - Outcome("jean", "Rachis", "hospitalized_fall", 4.0, makeTS(2017, 7, 18)), + Outcome("jean", "Rachis", "hospitalized_fall", 2.0, makeTS(2017, 7, 18)), //private ambulatory Outcome("riri", "FemurExclusionCol", PrivateAmbulatoryFractures.outcomeName, 1.0, makeTS(2007, 1, 1)), //public ambulatory @@ -69,11 +78,10 @@ class FracturesTransformerSuite extends SharedContext { Outcome("Ben", "MembreSuperieurDistal", "Liberal", 1.0, makeTS(2017, 7, 18)), Outcome("Beni", "MembreSuperieurDistal", "Liberal", 1.0, makeTS(2017, 7, 18)), Outcome("Sam", "CraneFace", "Liberal", 1.0, makeTS(2015, 7, 18)) - ).toDS //When - val result = new FracturesTransformer(testConf).transform(liberalActs, acts, diagnoses) + val result = new FracturesTransformer(testConf).transform(liberalActs, acts, diagnoses, surgeries, hospitalDeaths) //Then assertDSs(result, expectedOutcomes) diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFracturesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFracturesSuite.scala index bb4995dc..7082e058 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFracturesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/HospitalizedFracturesSuite.scala @@ -5,6 +5,7 @@ package fr.polytechnique.cmap.cnam.study.fall.fractures import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events.{Outcome, _} +import fr.polytechnique.cmap.cnam.study.fall.extractors.{Death, Mutation, Transfer} import fr.polytechnique.cmap.cnam.util.functions._ @@ -81,66 +82,132 @@ class HospitalizedFracturesSuite extends SharedContext { ).toDS // When - val result = HospitalizedFractures.filterDiagnosisForFracturesFollowUp(input, badStays) + val result = HospitalizedFractures.filterDiagnosisForFracturesFollowUp(badStays)(input) // Then assertDSs(result, expected) } - "transform" should "return Fractures Event Dataset based on the algorithm" in { + "getFourthLevelSeverity" should "return diagnosis where then patient died at the end of the same hospital stay" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val diagnoses = Seq( - McoMainDiagnosis("Pierre", "3", "S02.42", makeTS(2017, 7, 18)), - McoMainDiagnosis("Paul", "1", "S42.54678", makeTS(2017, 7, 20)), - McoMainDiagnosis("Paul", "7", "S02.42", makeTS(2017, 1, 2)), - McoAssociatedDiagnosis("Jacques", "8", "qu'est-ce-que tu fais là?", makeTS(2017, 7, 18)) + val input: Dataset[Event[Diagnosis]] = List( + McoMainDiagnosis("Paul", "1", "hemorroides", makeTS(2017, 7, 20)), + McoMainDiagnosis("Pierre", "3", "jambe cassée", makeTS(2017, 7, 18)), + McoMainDiagnosis("Pierre", "4", "jambe cassée", makeTS(2016, 7, 18)) ).toDS - val medicalActs = Seq( - McoCCAMAct("Paul", "1", "LJGA001", makeTS(2017, 7, 20)) + val stays: Dataset[Event[HospitalStay]] = List[Event[HospitalStay]]( + McoHospitalStay("Paul", "1", Death.value, 8.0D, makeTS(2017, 7, 20), Some(makeTS(2017, 7, 20))), + McoHospitalStay("Pierre", "3", Mutation.value, 8.0D, makeTS(2017, 7, 18), Some(makeTS(2017, 7, 18))), + McoHospitalStay("Pierre", "4", Transfer.value, 8.0D, makeTS(2016, 7, 18), Some(makeTS(2016, 7, 18))) + ).toDS() + + val expected: Dataset[Event[Diagnosis]] = List( + McoMainDiagnosis("Paul", "1", "hemorroides", makeTS(2017, 7, 20)) ).toDS - val expected = Seq( - Outcome("Pierre", "AllSites", "hospitalized_fall", makeTS(2017, 7, 18)), - Outcome("Paul", "AllSites", "hospitalized_fall", makeTS(2017, 1, 2)) + // When + val result = HospitalizedFractures.getFourthLevelSeverity(stays)(input) + + // Then + assertDSs(result, expected) + } + + "getThirdLevel" should "return diagnosis where the patient did have a surgery during the same stay" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val input: Dataset[Event[Diagnosis]] = List( + McoMainDiagnosis("Paul", "1", "hemorroides", makeTS(2017, 7, 20)), + McoMainDiagnosis("Pierre", "3", "jambe cassée", makeTS(2017, 7, 18)), + McoMainDiagnosis("Pierre", "4", "jambe cassée", makeTS(2016, 7, 18)) + ).toDS + + val surgeries: Dataset[Event[MedicalAct]] = List[Event[MedicalAct]]( + McoCCAMAct("Pierre", "5", "jambe", 8.0D, makeTS(2017, 7, 18), Some(makeTS(2017, 7, 18))), + McoCCAMAct("Pierre", "4", "test", 8.0D, makeTS(2016, 7, 18), Some(makeTS(2016, 7, 18))) + ).toDS() + + val expected: Dataset[Event[Diagnosis]] = List( + McoMainDiagnosis("Pierre", "4", "jambe cassée", makeTS(2016, 7, 18)) ).toDS // When - val result = HospitalizedFractures.transform(diagnoses, medicalActs, List(AllSites)) + val result = HospitalizedFractures.getThirdLevelSeverity(surgeries)(input) + // Then assertDSs(result, expected) } - "transform" should "return correct weight" in { + "assignSeverityToDiagnosis" should "assign a weight to a Diagnosis based on stays and surgeries of the patient" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val input: Dataset[Event[Diagnosis]] = List( + McoMainDiagnosis("Paul", "1", "ColDuFemur", makeTS(2017, 7, 20)), + McoMainDiagnosis("Pierre", "3", "Coude", makeTS(2017, 7, 18)), + McoMainDiagnosis("Pierre", "4", "Poignet", makeTS(2016, 7, 18)) + ).toDS + + val surgeries: Dataset[Event[MedicalAct]] = List[Event[MedicalAct]]( + McoCCAMAct("Pierre", "4", "test", 8.0D, makeTS(2016, 7, 18), None) + ).toDS() + + val stays: Dataset[Event[HospitalStay]] = List[Event[HospitalStay]]( + McoHospitalStay("Paul", "1", Death.value, 8.0D, makeTS(2017, 7, 20), Some(makeTS(2017, 7, 20))) + ).toDS() + + val expected: Dataset[Event[Diagnosis]] = List( + McoMainDiagnosis("Paul", "1", "ColDuFemur", 4D, makeTS(2017, 7, 20), None), + McoMainDiagnosis("Pierre", "3", "Coude", 2D, makeTS(2017, 7, 18)), + McoMainDiagnosis("Pierre", "4", "Poignet", 3D, makeTS(2016, 7, 18)) + ).toDS + + // When + val result = HospitalizedFractures.assignSeverityToDiagnosis(stays, surgeries)(input) + + // Then + assertDSs(result, expected) + } + + "transform" should "return Fractures Event Dataset based on the algorithm" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given val diagnoses = Seq( - McoMainDiagnosis("Pierre", "3", "S02.42", 2.0, makeTS(2017, 7, 18)), - McoMainDiagnosis("Jean", "2", "S02.42", 3.0, makeTS(2017, 7, 18)), - McoMainDiagnosis("Kevin", "4", "S02.42", 4.0, makeTS(2017, 7, 18)), - McoMainDiagnosis("Paul", "1", "S42.54678", 2.0, makeTS(2017, 7, 20)), - McoMainDiagnosis("Paul", "7", "S42.54678", 2.0, makeTS(2017, 1, 2)), - McoAssociatedDiagnosis("Jacques", "8", "qu'est-ce-que tu fais là?", 2.0, makeTS(2017, 7, 18)) + McoMainDiagnosis("Pierre", "3", "S02.42", makeTS(2017, 7, 18)), + McoMainDiagnosis("Paul", "1", "S42.54678", makeTS(2017, 7, 20)), + McoMainDiagnosis("Paul", "7", "S02.42", makeTS(2017, 1, 2)), + McoMainDiagnosis("Charlotte", "9", "S02.42", makeTS(2017, 10, 22)), + McoAssociatedDiagnosis("Jacques", "8", "qu'est-ce-que tu fais là?", makeTS(2017, 7, 18)) ).toDS + val surgeries: Dataset[Event[MedicalAct]] = List[Event[MedicalAct]]( + McoCCAMAct("Pierre", "3", "test", 8.0D, makeTS(2016, 7, 18), None) + ).toDS() + + val stays: Dataset[Event[HospitalStay]] = List[Event[HospitalStay]]( + McoHospitalStay("Paul", "7", Death.value, 8.0D, makeTS(2017, 1, 2), Some(makeTS(2017, 1, 3))) + ).toDS() + val medicalActs = Seq( McoCCAMAct("Paul", "1", "LJGA001", makeTS(2017, 7, 20)) ).toDS - val expected = Seq( - Outcome("Pierre", "AllSites", "hospitalized_fall", 2.0, makeTS(2017, 7, 18)), - Outcome("Jean", "AllSites", "hospitalized_fall", 3.0, makeTS(2017, 7, 18)), - Outcome("Kevin", "AllSites", "hospitalized_fall", 4.0, makeTS(2017, 7, 18)), - Outcome("Paul", "AllSites", "hospitalized_fall", 2.0, makeTS(2017, 1, 2)) + val expected: Dataset[Event[Outcome]] = Seq[Event[Outcome]]( + Outcome("Pierre", "AllSites", "hospitalized_fall", 3D, makeTS(2017, 7, 18), None), + Outcome("Paul", "AllSites", "hospitalized_fall", 4D, makeTS(2017, 1, 2), None), + Outcome("Charlotte", "AllSites", "hospitalized_fall", 2D, makeTS(2017, 10, 22), None) ).toDS // When - val result = HospitalizedFractures.transform(diagnoses, medicalActs, List(AllSites)) + val result = HospitalizedFractures.transform(diagnoses, medicalActs, stays, surgeries, List(AllSites)) // Then assertDSs(result, expected) } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/LiberalFracturesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/LiberalFracturesSuite.scala index 960370e0..466a5c83 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/LiberalFracturesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/LiberalFracturesSuite.scala @@ -20,10 +20,10 @@ class LiberalFracturesSuite extends SharedContext { DcirAct("Sam", "3", "4561", makeTS(2015, 7, 18)) ).toDF.as[Event[MedicalAct]] val expected = Seq( - Outcome("Pierre", "Clavicule", "Liberal", makeTS(2017, 7, 18)), - Outcome("Ben", "MembreSuperieurDistal", "Liberal", makeTS(2017, 7, 18)), - Outcome("Sam", "CraneFace", "Liberal", makeTS(2015, 7, 18)), - Outcome("Sam", "undefined", "Liberal", makeTS(2015, 7, 18)) + Outcome("Pierre", "Clavicule", "Liberal", 1D, makeTS(2017, 7, 18), None), + Outcome("Ben", "MembreSuperieurDistal", "Liberal", 1D, makeTS(2017, 7, 18), None), + Outcome("Sam", "CraneFace", "Liberal", 1D, makeTS(2015, 7, 18), None), + Outcome("Sam", "undefined", "Liberal", 1D, makeTS(2015, 7, 18), None) ).toDF.as[Event[Outcome]] //When diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PrivateAmbulatoryFracturesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PrivateAmbulatoryFracturesSuite.scala index 3cee7b37..5fe5e161 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PrivateAmbulatoryFracturesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PrivateAmbulatoryFracturesSuite.scala @@ -2,8 +2,9 @@ package fr.polytechnique.cmap.cnam.study.fall.fractures +import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, Outcome} +import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, Event, Outcome} import fr.polytechnique.cmap.cnam.util.functions._ class PrivateAmbulatoryFracturesSuite extends SharedContext { @@ -53,8 +54,8 @@ class PrivateAmbulatoryFracturesSuite extends SharedContext { DcirAct("loulou", DcirAct.groupID.PublicAmbulatory, "stupidcode", makeTS(2007, 1, 1)) ).toDS - val expected = Seq( - Outcome("riri", "FemurExclusionCol", PrivateAmbulatoryFractures.outcomeName, makeTS(2007, 1, 1)) + val expected: Dataset[Event[Outcome]] = Seq[Event[Outcome]]( + Outcome("riri", "FemurExclusionCol", PrivateAmbulatoryFractures.outcomeName, 1D, makeTS(2007, 1, 1), None) ).toDS // When diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFracturesSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFracturesSuite.scala index 737ef97b..b2f9e8d1 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFracturesSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/fractures/PublicAmbulatoryFracturesSuite.scala @@ -2,8 +2,9 @@ package fr.polytechnique.cmap.cnam.study.fall.fractures +import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, McoCeCcamAct, McoCIM10Act, Outcome} +import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, Event, McoCeCcamAct, McoCIM10Act, Outcome} import fr.polytechnique.cmap.cnam.util.functions.makeTS class PublicAmbulatoryFracturesSuite extends SharedContext { @@ -52,8 +53,8 @@ class PublicAmbulatoryFracturesSuite extends SharedContext { DcirAct("john", "ACE", "MZMP007", makeTS(2010, 2, 6)) ).toDS - val expected = Seq( - Outcome("georgette", "MembreSuperieurDistal", PublicAmbulatoryFractures.outcomeName, makeTS(2010, 2, 6)) + val expected: Dataset[Event[Outcome]] = Seq[Event[Outcome]]( + Outcome("georgette", "MembreSuperieurDistal", PublicAmbulatoryFractures.outcomeName, 1D, makeTS(2010, 2, 6), None) ).toDS // When From 0e65a813b31e512bc598ad646381586b4ec58532 Mon Sep 17 00:00:00 2001 From: Dian SUN Date: Tue, 14 Apr 2020 11:23:14 +0200 Subject: [PATCH 35/38] CNAM-456: optimise imports --- .../fr/polytechnique/cmap/cnam/etl/datatypes/Period.scala | 1 - .../cmap/cnam/etl/events/Classification.scala | 1 - .../fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala | 1 - .../scala/fr/polytechnique/cmap/cnam/etl/events/Drug.scala | 1 - .../fr/polytechnique/cmap/cnam/etl/events/Exposure.scala | 1 - .../fr/polytechnique/cmap/cnam/etl/events/FollowUp.scala | 1 - .../fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala | 1 - .../cmap/cnam/etl/events/MedicalTakeOverReason.scala | 1 - .../fr/polytechnique/cmap/cnam/etl/events/Molecule.scala | 1 - .../fr/polytechnique/cmap/cnam/etl/events/Outcome.scala | 1 - .../cmap/cnam/etl/events/PractionnerClaimSpeciality.scala | 1 - .../fr/polytechnique/cmap/cnam/etl/events/Trackloss.scala | 1 - .../events/diagnoses/SsrDiagnosisExtractor.scala | 2 +- .../cnam/etl/extractors/events/drugs/DrugExtractor.scala | 2 +- .../events/molecules/DcirMoleculePurchases.scala | 2 +- .../extractors/events/ngapacts/DcirNgapActExtractor.scala | 2 +- .../extractors/events/ngapacts/McoCeNgapActExtractor.scala | 4 ++-- .../cmap/cnam/etl/extractors/patients/DcirPatients.scala | 2 +- .../cmap/cnam/etl/extractors/patients/HadPatients.scala | 2 +- .../cmap/cnam/etl/extractors/patients/IrBenPatients.scala | 2 +- .../cmap/cnam/etl/extractors/patients/McoPatients.scala | 2 +- .../cmap/cnam/etl/extractors/patients/McocePatients.scala | 2 +- .../cmap/cnam/etl/extractors/patients/Patients.scala | 2 +- .../cmap/cnam/etl/extractors/sources/had/HadSource.scala | 2 +- .../cmap/cnam/etl/extractors/sources/mco/McoSource.scala | 2 +- .../cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala | 7 ++----- .../cmap/cnam/etl/extractors/sources/ssr/SsrSource.scala | 2 +- .../cmap/cnam/etl/filters/PatientFiltersImplicits.scala | 3 +-- .../fr/polytechnique/cmap/cnam/etl/sources/Sources.scala | 5 ++--- .../cmap/cnam/etl/sources/data/HadFilters.scala | 2 +- .../cmap/cnam/etl/sources/data/HadSource.scala | 4 ++-- .../cmap/cnam/etl/sources/data/SsrFilters.scala | 2 +- .../cmap/cnam/etl/sources/data/SsrSource.scala | 4 +--- .../etl/transformers/follow_up/FollowUpTransformer.scala | 4 ++-- .../observation/ObservationPeriodTransformer.scala | 2 +- .../cmap/cnam/study/fall/extractors/CardiacExtractor.scala | 2 +- .../cmap/cnam/study/fall/extractors/HTAExtractor.scala | 2 +- .../cmap/cnam/study/fall/extractors/IPPExtractor.scala | 2 +- .../cmap/cnam/study/fall/extractors/OpioidsExtractor.scala | 2 +- .../cmap/cnam/study/pioglitazone/PioglitazoneConfig.scala | 3 +-- .../cnam/study/rosiglitazone/RosiglitazoneConfig.scala | 2 +- 41 files changed, 34 insertions(+), 54 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/datatypes/Period.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/datatypes/Period.scala index 29272f60..cb19c7f2 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/datatypes/Period.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/datatypes/Period.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.datatypes import java.sql.Timestamp -import fr.polytechnique.cmap.cnam.etl.transformers.interaction._ import fr.polytechnique.cmap.cnam.util.functions._ case class Period(start: Timestamp, end: Timestamp) extends Subtractable[Period] with Addable[Period]{ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Classification.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Classification.scala index 947290a9..859ad0b0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Classification.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Classification.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row trait Classification extends AnyEvent with EventBuilder { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala index 5fefa552..c55a512e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Diagnosis.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row trait Diagnosis extends AnyEvent with EventBuilder { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Drug.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Drug.scala index b26e629b..b60279f3 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Drug.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Drug.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row object Drug extends Drug diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Exposure.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Exposure.scala index e0b3a757..cbdd9f52 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Exposure.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Exposure.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row object Exposure extends Exposure diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUp.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUp.scala index 035d355f..27b74a49 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUp.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/FollowUp.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row /** Factory for FollowUp instances. */ object FollowUp extends FollowUp diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala index 76a04463..535e0af5 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalAct.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row trait MedicalAct extends AnyEvent with EventBuilder { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReason.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReason.scala index 6a97e447..7fd0b9af 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReason.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/MedicalTakeOverReason.scala @@ -1,7 +1,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row trait MedicalTakeOverReason extends AnyEvent with EventBuilder { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Molecule.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Molecule.scala index 33e98963..feb6116f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Molecule.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Molecule.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row object Molecule extends Molecule diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Outcome.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Outcome.scala index 1478e149..c49fdd44 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Outcome.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Outcome.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row object Outcome extends Outcome diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala index 63ed1342..763549f6 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/PractionnerClaimSpeciality.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row trait PractitionerClaimSpeciality extends AnyEvent with EventBuilder { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Trackloss.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Trackloss.scala index f324a83e..34964c05 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Trackloss.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/Trackloss.scala @@ -3,7 +3,6 @@ package fr.polytechnique.cmap.cnam.etl.events import java.sql.Timestamp -import org.apache.spark.sql.Row object Trackloss extends Trackloss diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala index b773bf47..54e1b371 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/diagnoses/SsrDiagnosisExtractor.scala @@ -2,7 +2,7 @@ package fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses -import fr.polytechnique.cmap.cnam.etl.events.{Diagnosis, EventBuilder, SsrAssociatedDiagnosis, SsrLinkedDiagnosis, SsrMainDiagnosis, SsrTakingOverPurpose} +import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr.SsrSimpleExtractor diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugExtractor.scala index eb7a11d6..682fe299 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/drugs/DrugExtractor.scala @@ -4,9 +4,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors.events.drugs import java.sql.Timestamp import org.apache.commons.codec.binary.Base64 -import org.apache.spark.sql.{Column, DataFrame, Row} import org.apache.spark.sql.functions.{col, when} import org.apache.spark.sql.types.{StringType, TimestampType} +import org.apache.spark.sql.{Column, DataFrame, Row} import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} import fr.polytechnique.cmap.cnam.etl.extractors.Extractor import fr.polytechnique.cmap.cnam.etl.sources.Sources diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/DcirMoleculePurchases.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/DcirMoleculePurchases.scala index 81ea1ab8..4558a078 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/DcirMoleculePurchases.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/molecules/DcirMoleculePurchases.scala @@ -3,10 +3,10 @@ package fr.polytechnique.cmap.cnam.etl.extractors.events.molecules import java.sql.Timestamp -import org.apache.spark.sql.{Column, DataFrame, Row} import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions.{col, sum, udf, when} import org.apache.spark.sql.types.{DoubleType, StringType, TimestampType} +import org.apache.spark.sql.{Column, DataFrame, Row} import fr.polytechnique.cmap.cnam.etl.events.{Event, Molecule} import fr.polytechnique.cmap.cnam.etl.extractors.Extractor import fr.polytechnique.cmap.cnam.etl.sources.Sources diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/DcirNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/DcirNgapActExtractor.scala index 3528fd06..cd5dca44 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/DcirNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/DcirNgapActExtractor.scala @@ -3,8 +3,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.events.ngapacts -import org.apache.spark.sql.{Column, DataFrame, Row} import org.apache.spark.sql.functions.col +import org.apache.spark.sql.{Column, DataFrame, Row} import fr.polytechnique.cmap.cnam.etl.events.{DcirNgapAct, Event, EventBuilder, NgapAct} import fr.polytechnique.cmap.cnam.etl.extractors.Extractor import fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir.DcirRowExtractor diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala index 4c7aa55d..268a12a4 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/ngapacts/McoCeNgapActExtractor.scala @@ -3,9 +3,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors.events.ngapacts import scala.util.Try -import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions.col -import fr.polytechnique.cmap.cnam.etl.events.{Event, EventBuilder, McoCeFbstcNgapAct, McoCeFcstcNgapAct, NgapAct} +import org.apache.spark.sql.{DataFrame, Row} +import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.Extractor import fr.polytechnique.cmap.cnam.etl.extractors.sources.mcoce.McoCeRowExtractor import fr.polytechnique.cmap.cnam.etl.sources.Sources diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala index 5d125fd7..629ce0fc 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala @@ -2,10 +2,10 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients -import org.apache.spark.sql.{Column, DataFrame, Dataset} import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ +import org.apache.spark.sql.{Column, DataFrame, Dataset} import fr.polytechnique.cmap.cnam.etl.extractors.patients.PatientUtils._ import fr.polytechnique.cmap.cnam.etl.patients.Patient diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala index 61dfcb15..70df2441 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala @@ -1,7 +1,7 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients -import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{Column, DataFrame} import fr.polytechnique.cmap.cnam.util.functions.computeDateUsingMonthYear private[patients] object HadPatients { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala index 6503955a..b30c183f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala @@ -2,9 +2,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients -import org.apache.spark.sql.{Column, DataFrame, Dataset} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.TimestampType +import org.apache.spark.sql.{Column, DataFrame, Dataset} import fr.polytechnique.cmap.cnam.etl.patients.Patient import fr.polytechnique.cmap.cnam.util.functions.computeDateUsingMonthYear diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala index 2a5dfeea..6b0bdc57 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala @@ -2,8 +2,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients -import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{Column, DataFrame} import fr.polytechnique.cmap.cnam.util.functions.computeDateUsingMonthYear private[patients] object McoPatients { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala index 13bb1f6e..02963d06 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala @@ -2,10 +2,10 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients -import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DoubleType, IntegerType, TimestampType} +import org.apache.spark.sql.{DataFrame, Dataset} import fr.polytechnique.cmap.cnam.etl.extractors.patients.PatientUtils.estimateBirthDateCol import fr.polytechnique.cmap.cnam.etl.patients.Patient diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala index e80cf7db..3f9f10a0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala @@ -3,8 +3,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients import java.sql.Timestamp -import org.apache.spark.sql.{Column, DataFrame, Dataset} import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{Column, DataFrame, Dataset} import fr.polytechnique.cmap.cnam.etl.patients._ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.datetime.implicits._ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSource.scala index 4bf302ec..afc5345c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/had/HadSource.scala @@ -1,8 +1,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.sources.had -import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.TimestampType +import org.apache.spark.sql.{Column, DataFrame} import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames trait HadSource extends ColumnNames { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSource.scala index 26e57fa0..9caebc4e 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/mco/McoSource.scala @@ -2,9 +2,9 @@ package fr.polytechnique.cmap.cnam.etl.extractors.sources.mco -import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{LongType, TimestampType} +import org.apache.spark.sql.{Column, DataFrame} import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames import fr.polytechnique.cmap.cnam.util.ColumnUtilities.parseTimestamp diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala index 8166b459..e99e49aa 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrRowExtractor.scala @@ -1,11 +1,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr import java.sql.Timestamp -import org.apache.spark.sql.{DataFrame, Row} -import org.apache.spark.sql.functions.col -import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, EventBuilder} -import fr.polytechnique.cmap.cnam.etl.extractors.{EventRowExtractor, Extractor} -import fr.polytechnique.cmap.cnam.etl.sources.Sources +import org.apache.spark.sql.Row +import fr.polytechnique.cmap.cnam.etl.extractors.EventRowExtractor /** * Gets the following fields for SSR sourced events: patientID, start, groupId. diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSource.scala index 9f91beb4..ab6edbb0 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/ssr/SsrSource.scala @@ -1,8 +1,8 @@ package fr.polytechnique.cmap.cnam.etl.extractors.sources.ssr -import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{LongType, TimestampType} +import org.apache.spark.sql.{Column, DataFrame} import fr.polytechnique.cmap.cnam.etl.extractors.ColumnNames import fr.polytechnique.cmap.cnam.util.ColumnUtilities.parseTimestamp diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/filters/PatientFiltersImplicits.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/filters/PatientFiltersImplicits.scala index 12fdd3ac..72956bfe 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/filters/PatientFiltersImplicits.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/filters/PatientFiltersImplicits.scala @@ -6,10 +6,9 @@ import java.sql.Timestamp import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{BooleanType, TimestampType} -import org.apache.spark.sql.{Column, DataFrame, Dataset} +import org.apache.spark.sql.{Column, Dataset} import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.patients.Patient -import fr.polytechnique.cmap.cnam.util.RichDataFrame._ /* * The architectural decisions regarding the patient filters can be found in the following page: diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala index 6d18dee1..b3ae1114 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/Sources.scala @@ -3,11 +3,10 @@ package fr.polytechnique.cmap.cnam.etl.sources import java.sql.Timestamp - import org.apache.spark.sql.{DataFrame, SQLContext} import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig.InputPaths -import fr.polytechnique.cmap.cnam.etl.sources.data.{DcirSource, HadSource, McoCeSource, McoSource, SsrCeSource, SsrSource} -import fr.polytechnique.cmap.cnam.etl.sources.value.{DosagesSource, IrBenSource, IrImbSource, IrNatSource, IrPhaSource} +import fr.polytechnique.cmap.cnam.etl.sources.data._ +import fr.polytechnique.cmap.cnam.etl.sources.value._ case class Sources( dcir: Option[DataFrame] = None, diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala index c0f24456..cf2b7101 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadFilters.scala @@ -1,7 +1,7 @@ package fr.polytechnique.cmap.cnam.etl.sources.data -import fr.polytechnique.cmap.cnam.etl.sources.data.DoublonFinessPmsi.specialHospitalCodes import org.apache.spark.sql.{Column, DataFrame} +import fr.polytechnique.cmap.cnam.etl.sources.data.DoublonFinessPmsi.specialHospitalCodes private[data] class HadFilters(rawHad: DataFrame) { /** Filter out Had corrupted stays as returned by the ATIH. diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSource.scala index 85b90dde..47f78cec 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/HadSource.scala @@ -1,7 +1,7 @@ package fr.polytechnique.cmap.cnam.etl.sources.data -import org.apache.spark.sql.functions.{col, to_date, year} -import org.apache.spark.sql.{Column, DataFrame, SQLContext} +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.{Column, DataFrame} /** * Extractor class for the SSR table diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFilters.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFilters.scala index c0d9a54c..30cbb0f9 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFilters.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrFilters.scala @@ -1,7 +1,7 @@ package fr.polytechnique.cmap.cnam.etl.sources.data -import fr.polytechnique.cmap.cnam.etl.sources.data.DoublonFinessPmsi.specialHospitalCodes import org.apache.spark.sql.{Column, DataFrame} +import fr.polytechnique.cmap.cnam.etl.sources.data.DoublonFinessPmsi.specialHospitalCodes private[data] class SsrFilters(rawSsr: DataFrame) { /** Filter out Ssr corrupted stays as returned by the ATIH. diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSource.scala index 500cc119..eeb2eb89 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/sources/data/SsrSource.scala @@ -1,9 +1,7 @@ package fr.polytechnique.cmap.cnam.etl.sources.data import org.apache.spark.sql.functions.col -import org.apache.spark.sql.{Column, DataFrame, SQLContext} -import org.apache.spark.sql.functions.to_date -import org.apache.spark.sql.functions.year +import org.apache.spark.sql.{Column, DataFrame} /** * Extractor class for the SSR table diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala index 9a64398e..0957c8cf 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/follow_up/FollowUpTransformer.scala @@ -4,8 +4,8 @@ package fr.polytechnique.cmap.cnam.etl.transformers.follow_up import java.sql.Timestamp import scala.util.Try -import org.apache.spark.sql.functions._ import org.apache.spark.sql.Dataset +import org.apache.spark.sql.functions._ import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.patients.Patient @@ -35,8 +35,8 @@ class FollowUpTransformer(config: FollowUpTransformerConfig) { tracklosses: Dataset[Event[Trackloss]]): Dataset[Event[FollowUp]] = { import patients.sparkSession.implicits._ - import FollowUpTransformerUtilities._ import Columns._ + import FollowUpTransformerUtilities._ val delayMonths = config.delayMonths diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/ObservationPeriodTransformer.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/ObservationPeriodTransformer.scala index 7679bfed..0c389608 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/ObservationPeriodTransformer.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/observation/ObservationPeriodTransformer.scala @@ -3,8 +3,8 @@ package fr.polytechnique.cmap.cnam.etl.transformers.observation import java.sql.Timestamp -import org.apache.spark.sql.functions._ import org.apache.spark.sql.Dataset +import org.apache.spark.sql.functions._ import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, Event, Molecule, ObservationPeriod} import fr.polytechnique.cmap.cnam.util.datetime.implicits._ diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala index 012224eb..f67581bf 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/CardiacExtractor.scala @@ -4,9 +4,9 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.Cardiac import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.TherapeuticLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources object CardiacExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala index 44efea4e..2b954cd7 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/HTAExtractor.scala @@ -4,9 +4,9 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.Antihypertenseurs import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.TherapeuticLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources object HTAExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala index 2a7f438c..e346ab07 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/IPPExtractor.scala @@ -4,9 +4,9 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.ProtonPumpInhibitors import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.TherapeuticLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources object IPPExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala index 70a0decf..ee7286e3 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/OpioidsExtractor.scala @@ -4,9 +4,9 @@ package fr.polytechnique.cmap.cnam.study.fall.extractors import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event} -import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.classification.families.Opioids import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.level.TherapeuticLevel +import fr.polytechnique.cmap.cnam.etl.extractors.events.drugs.{DrugConfig, DrugExtractor} import fr.polytechnique.cmap.cnam.etl.sources.Sources object OpioidsExtractor { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneConfig.scala index 474aa684..3e801134 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneConfig.scala @@ -3,9 +3,8 @@ package fr.polytechnique.cmap.cnam.study.pioglitazone import java.time.LocalDate -import pureconfig.generic.auto._ -import me.danielpes.spark.datetime.Period import me.danielpes.spark.datetime.implicits._ +import pureconfig.generic.auto._ import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig import fr.polytechnique.cmap.cnam.etl.config.{BaseConfig, ConfigLoader} import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.MedicalActsConfig diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneConfig.scala index 77e5e45f..12dac1df 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneConfig.scala @@ -3,8 +3,8 @@ package fr.polytechnique.cmap.cnam.study.rosiglitazone import java.time.LocalDate -import pureconfig.generic.auto._ import me.danielpes.spark.datetime.implicits._ +import pureconfig.generic.auto._ import fr.polytechnique.cmap.cnam.etl.config.study.StudyConfig import fr.polytechnique.cmap.cnam.etl.config.{BaseConfig, ConfigLoader} import fr.polytechnique.cmap.cnam.etl.extractors.events.diagnoses.DiagnosesConfig From 99e20f4fcb81cb1631aa89712a6d2ac0617acd76 Mon Sep 17 00:00:00 2001 From: Dian SUN Date: Fri, 17 Apr 2020 12:47:13 +0200 Subject: [PATCH 36/38] CNAM-456: avoid scala match may not be exhaustive --- .../cnam/etl/transformers/exposures/ExposureDuration.scala | 2 ++ .../cmap/cnam/etl/transformers/interaction/ExposureN.scala | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposureDuration.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposureDuration.scala index 3f822766..b06e594f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposureDuration.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposureDuration.scala @@ -48,6 +48,8 @@ case class ExposureDuration(patientID: String, value: String, period: Period, sp RightRemainingPeriod(ExposureDuration(self.patientID, self.value, p2, self.span)) ) } + // avoid scala match may not be exhaustive + case _ => NullRemainingPeriod } } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/ExposureN.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/ExposureN.scala index 7ea1c343..3a2aa524 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/ExposureN.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/interaction/ExposureN.scala @@ -3,8 +3,7 @@ package fr.polytechnique.cmap.cnam.etl.transformers.interaction import cats.syntax.functor._ -import me.danielpes.spark.datetime.{Period => Duration} -import fr.polytechnique.cmap.cnam.etl.datatypes.{NullRemainingPeriod, Period, Subtractable, RemainingPeriod} +import fr.polytechnique.cmap.cnam.etl.datatypes.{NullRemainingPeriod, Period, RemainingPeriod, Subtractable} import fr.polytechnique.cmap.cnam.etl.events.{Event, Interaction} case class ExposureN(patientID: String, values: Set[String], period: Period) extends Subtractable[ExposureN] { @@ -12,6 +11,7 @@ case class ExposureN(patientID: String, values: Set[String], period: Period) ext /** * Returns duration of this ExposureN in milliseconds + * * @return duration in millisecond as Long */ def toDuration: Long = self.period.end.getTime - self.period.start.getTime From 6c4ea8d514fc3c8159726a6e96bfba4d7bade651 Mon Sep 17 00:00:00 2001 From: Youcef Sebiat Date: Fri, 17 Apr 2020 12:14:50 +0200 Subject: [PATCH 37/38] Hotfix: Correct bug on end date. Hotfix: Add tests. Hotfix: Add tests. --- .../FallHospitalStayExtractor.scala | 10 ++++- .../FallHospitalStayExtractorSuite.scala | 43 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractorSuite.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractor.scala index fd54690f..e8a26b7c 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractor.scala @@ -28,7 +28,15 @@ class FallHospitalStayExtractor(codes: SimpleExtractorCodes) extends McoSimpleEx override def neededColumns: List[String] = List(ColNames.EndDate, ColNames.ExitMode) ++ super.usedColumns - override def extractEnd(r: Row): Option[Timestamp] = Some(new Timestamp(r.getAs[Date](ColNames.EndDate).getTime)) + override def extractEnd(r: Row): Option[Timestamp] = Some { + if (!r.isNullAt(r.fieldIndex(ColNames.EndDate))) { + new Timestamp(r.getAs[Date](ColNames.EndDate).getTime) + } + else { // This shouldn't happen, but some hospital stays come without an EndDate + extractStart(r) + } + + } override def extractValue(row: Row): String = exitCodes(row.getAs[String](columnName)).value } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractorSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractorSuite.scala new file mode 100644 index 00000000..89e769ab --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/extractors/FallHospitalStayExtractorSuite.scala @@ -0,0 +1,43 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.study.fall.extractors + +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.types.{DateType, StructField, StructType} +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.mco.McoSource +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class FallHospitalStayExtractorSuite extends SharedContext { + val colNames = new McoSource {}.ColNames + val newColNames = new McoSource {}.NewColumns + + "extractEnd" should "return the end date from end date column" in { + // Given + val schema = StructType( + StructField(colNames.EndDate, DateType) :: + StructField(newColNames.EstimatedStayStart, DateType) :: Nil + ) + val array = Array[Any](makeTS(2020, 1, 3), makeTS(2020, 1, 1)) + val input = new GenericRowWithSchema(array, schema) + val expected = makeTS(2020, 1, 3) + //When + val result = new FallHospitalStayExtractor(SimpleExtractorCodes.empty).extractEnd(input) + assert(result.get == expected) + } + + it should "fall back on the start date when the end date column is null" in { + // Given + val schema = StructType( + StructField(colNames.EndDate, DateType) :: + StructField(newColNames.EstimatedStayStart, DateType) :: Nil + ) + val array = Array[Any](null, makeTS(2020, 1, 1)) + val input = new GenericRowWithSchema(array, schema) + val expected = makeTS(2020, 1, 1) + //When + val result = new FallHospitalStayExtractor(SimpleExtractorCodes.empty).extractEnd(input) + assert(result.get == expected, true) + } +} From 0895435cdad11d0152a4714f0796c3ede5a9ca2c Mon Sep 17 00:00:00 2001 From: Kevin Date: Fri, 17 Jul 2020 10:36:36 +0200 Subject: [PATCH 38/38] CNAM-286: Refactoring Patients New abstraction of Patients Extractors Uniform all extractors of patients from different sources Separation of extractors and filters --- .../patients/AllPatientExtractor.scala | 89 +++++++ .../extractors/patients/DcirPatients.scala | 214 ++++++++++----- .../etl/extractors/patients/HadPatients.scala | 115 +++++---- .../extractors/patients/IrBenPatients.scala | 138 +++++----- .../etl/extractors/patients/McoPatients.scala | 118 +++++---- .../extractors/patients/McocePatients.scala | 218 ++++++++++------ .../patients/PatientExtractor.scala | 80 ++++++ .../etl/extractors/patients/Patients.scala | 109 -------- .../extractors/patients/PatientsConfig.scala | 3 +- .../patients/PatientFilters.scala | 76 ++++++ .../bulk/extractors/PatientExtractor.scala | 5 +- .../cmap/cnam/study/fall/FallMain.scala | 30 ++- .../cnam/study/fall/FallMainExtract.scala | 33 ++- .../cnam/study/fall/FallMainTransform.scala | 2 +- .../study/pioglitazone/PioglitazoneMain.scala | 18 +- .../rosiglitazone/RosiglitazoneMain.scala | 20 +- src/test/resources/test-input/HAD.parquet | Bin 22964 -> 59557 bytes ...e.scala => AllPatientExtractorSuite.scala} | 87 +++++-- .../patients/DcirPatientsSuite.scala | 202 ++++++++------- .../patients/HadPatientsSuite.scala | 132 ++++++---- .../patients/IrBenPatientsSuite.scala | 167 ++++++------ .../patients/McoPatientsSuite.scala | 140 +++++++--- .../patients/McocePatientsSuite.scala | 243 +++++++++++------- .../patients/PatientFiltersSuite.scala | 42 +++ .../FallMainExtractorTransformSuite.scala | 9 +- .../cmap/cnam/study/fall/FallMainSuite.scala | 6 +- 26 files changed, 1449 insertions(+), 847 deletions(-) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/AllPatientExtractor.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientExtractor.scala delete mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/patients/PatientFilters.scala rename src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/{PatientsSuite.scala => AllPatientExtractorSuite.scala} (53%) create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/patients/PatientFiltersSuite.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/AllPatientExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/AllPatientExtractor.scala new file mode 100644 index 00000000..3c39056d --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/AllPatientExtractor.scala @@ -0,0 +1,89 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.patients + +import org.apache.spark.sql.functions.{coalesce, col, when, year} +import org.apache.spark.sql.{Column, DataFrame, Dataset} +import fr.polytechnique.cmap.cnam.etl.patients.Patient +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +object AllPatientExtractor { + + def extract(sources: Sources): Dataset[Patient] = { + + val irBenPatients: Dataset[Patient] = IrBenPatients.extract(sources).as("irBen") + val dcirPatients: Dataset[Patient] = DcirPatients.extract(sources).as("dcir") + val mcoPatients: Dataset[Patient] = McoPatients.extract(sources).as("mco") + + val joinColumn: Column = coalesce(col("irBen.patientID"), col("mco.patientID")) + + val patients: DataFrame = irBenPatients + .join(mcoPatients, col("irBen.patientID") === col("mco.patientID"), "outer") + .join(dcirPatients, joinColumn === col("dcir.patientID"), "outer") + + val patientID: Column = coalesce( + col("dcir.patientID"), + col("irBen.patientID"), + col("mco.patientID") + ) + + val gender: Column = coalesce( + col("irBen.gender"), + col("dcir.gender") + ) + + val birthDate: Column = coalesce( + col("irBen.birthDate"), + col("dcir.birthDate") + ) + + val deathDate: Column = coalesce( + when( + validateDeathDate(col("irBen.deathDate"), birthDate), + col("irBen.deathDate") + ), + when( + validateDeathDate(col("dcir.deathDate"), birthDate), + col("dcir.deathDate") + ), + when( + validateDeathDate(col("mco.deathDate"), birthDate), + col("mco.deathDate") + )) + + import patients.sparkSession.implicits._ + + val birthYearErrors = List(-1, 0, 1, 1600) + + val filteredPatients = patients.where(birthDate.isNotNull && !year(birthDate).isin(birthYearErrors: _*)).select( + patientID.as("patientID"), + gender.as("gender"), + birthDate.as("birthDate"), + deathDate.as("deathDate") + ).as[Patient] + + sources.mcoCe match { + case None => filteredPatients.as[Patient] + case Some(_) => + val mcocePatients: Dataset[Patient] = McocePatients.extract(sources).as("mco_ce") + + val allPatients = filteredPatients.as("patients") + .join(mcocePatients, col("patients.patientID") === col("mco_ce.patientID"), "full") + + val idCol = coalesce(col("patients.patientID"), col("mco_ce.patientID")) + .alias("patientID") + val genderCol = coalesce(col("patients.gender"), col("mco_ce.gender")) + .alias("gender") + val birthDateCol = coalesce(col("patients.birthDate"), col("mco_ce.birthDate")) + .alias("birthDate") + val deathDateCol = coalesce(col("patients.deathDate"), col("mco_ce.deathDate")) + .alias("deathDate") + + allPatients + .select(idCol, genderCol, birthDateCol, deathDateCol) + .filter(col("patientID").isNotNull && col("gender").isNotNull && col("birthDate").isNotNull) + .as[Patient] + } + } + + def validateDeathDate(deathDate: Column, birthDate: Column): Column = + deathDate >= birthDate +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala index 629ce0fc..b6502eac 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatients.scala @@ -2,104 +2,176 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients +import java.sql.Timestamp import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ -import org.apache.spark.sql.{Column, DataFrame, Dataset} +import org.apache.spark.sql.{Column, Dataset} import fr.polytechnique.cmap.cnam.etl.extractors.patients.PatientUtils._ -import fr.polytechnique.cmap.cnam.etl.patients.Patient +import fr.polytechnique.cmap.cnam.etl.sources.Sources -private[patients] object DcirPatients { +case class PatientDcir(patientID: String, gender: Int, age: Int, birthYear: String, birthDate: Timestamp, eventDate: Timestamp, deathDate: Option[Timestamp]) + extends DerivedPatient - implicit class DcirPatientsDataFrame(data: DataFrame) { +private[patients] object DcirPatients extends PatientExtractor[PatientDcir] { - // The birth year for each patient is found by grouping by patientId and birthYear and then - // by taking the most frequent birth year for each patient. - def findBirthYears: DataFrame = { - val window = Window.partitionBy(col("patientID")).orderBy(col("count").desc, col("birthYear")) - data - .groupBy(col("patientID"), col("birthYear")).agg(count("*").as("count")) - // "first" is only deterministic when applied over an ordered window: - .select(col("patientID"), first(col("birthYear")).over(window).as("birthYear")) - .distinct - } + /** Find birth date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with birth date. + */ + override def findPatientBirthDate(patients: Dataset[PatientDcir]): Dataset[PatientDcir] = { + + val window = Window.partitionBy(col("patientID")).orderBy(col("count").desc, col("birthYear")) + val birthYear = patients + .groupBy(col("patientID"), col("birthYear")).agg(count("*").as("count")) + // "first" is only deterministic when applied over an ordered window: + .select(col("patientID"), first(col("birthYear")).over(window).as("birthYear")) + .distinct // After selecting the data, the next step is to group by patientId and age, because we need to // estimate the birthDate ant we use min(eventDate) and max(eventDate) for each age to achieve // that. - def groupByIdAndAge: DataFrame = { - data - .groupBy(col("patientID"), col("age")) - .agg( - count("gender").as("genderCount"), // We will use it to find the appropriate gender (avg) - sum("gender").as("genderSum"), // We will use it to find the appropriate gender (avg) - min("eventDate").as("minEventDate"), // the min event date for each age of a patient - max("eventDate").as("maxEventDate"), // the max event date for each age of a patient - min("deathDate").as("deathDate") // the earliest death date - ) - } + val minmaxevent = patients + .groupBy(col("patientID"), col("age")) + .agg( + min("eventDate").as("minEventDate"), // the min event date for each age of a patient + max("eventDate").as("maxEventDate") // the max event date for each age of a patient + ) // Then we aggregate again by taking the mean between the closest dates where the age changed. // For example, if the patient was 60yo when an event happened on Apr/2010 and he was 61yo when // another event happened on Jun/2010, we calculate the mean and estimate his birthday as - // being in May of the year found in "findBirthYears" - def estimateFields: DataFrame = { - val birthDateAggCol: Column = estimateBirthDateCol( - max(col("minEventDate")).cast(TimestampType), - min(col("maxEventDate")).cast(TimestampType), - first(col("birthYear")) - ) + // being in May of the year found + val birthDateAggCol: Column = estimateBirthDateCol( + max(col("minEventDate")).cast(TimestampType), + min(col("maxEventDate")).cast(TimestampType), + first(col("birthYear")) + ) + + val birthDate = minmaxevent.join(birthYear, "patientID") + .groupBy(col("patientID")) + .agg( + birthDateAggCol.as("birthDate")) + + import patients.sparkSession.implicits._ - data - .groupBy(col("patientID")) - .agg( - // Here we calculate the average of gender values and then we round. So, if 1 is more - // common, the average will be less than 1.5 and the final value will be 1. The same is - // valid for the case where 2 is more common. This is the reason why we set invalid - // values for gender to null. - round(sum(col("genderSum")) / sum(col("genderCount"))).cast(IntegerType).as("gender"), - birthDateAggCol.as("birthDate"), - min(col("deathDate")).cast(TimestampType).as("deathDate") - ) - } + val result = patients.as("patients") + .joinWith(birthYear.as("birthYearDf"), col("patients.patientID").equalTo(col("birthYearDf.patientID")), "left") + + result + .joinWith(birthDate, result("_1.patientID") === birthDate("patientID"), "left") + .map(p => + PatientDcir( + p._1._1.patientID, + p._1._1.gender, + p._1._1.age, + p._1._2.getAs("birthYear"), + p._2.getAs("birthDate"), + p._1._1.eventDate, + p._1._1.deathDate + )) } - def extract( - dcir: DataFrame, - minGender: Int, - maxGender: Int, - minYear: Int, - maxYear: Int): Dataset[Patient] = { + /** Find gender of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with gender. + */ + override def findPatientGender(patients: Dataset[PatientDcir]): Dataset[PatientDcir] = { + + import patients.sparkSession.implicits._ + + val genderCodeError = 9 + + val gendercount = patients + .filter(_.gender != genderCodeError) + .groupByKey(p => (p.patientID, p.age)) + .count() + .map(p => (p._1._1, p._2.toInt)) + + val gendersum = patients + .filter(_.gender != genderCodeError) + .map(p => ((p.patientID, p.age), p.gender)) + .groupByKey(_._1) + .mapValues(row => row._2) + .reduceGroups((acc, str) => acc + str) + .map(p => (p._1, p._2)) - val genderCol: Column = when( - col("BEN_SEX_COD").between(minGender, maxGender), - col("BEN_SEX_COD") - ).cast(IntegerType) + val sumgendercount = gendercount + .groupByKey(p => p._1) + .mapValues(row => row._2) + .reduceGroups((acc, str) => acc + str) + .map(p => (p._1, p._2)) - val deathDateCol: Column = when( - year(col("BEN_DCD_DTE")).between(minYear, maxYear), - col("BEN_DCD_DTE") - ).cast(DateType) + val sumgendersum = gendersum + .groupByKey(p => p._1._1) + .mapValues(row => row._2) + .reduceGroups((acc, str) => acc + str) + .map(p => (p._1, p._2)) + val result = patients.joinWith(sumgendersum, patients("patientID") === sumgendersum("_1"), "left") + + result.joinWith(sumgendercount, result("_1.patientID") === sumgendercount("_1"), "left") + .map(p => + PatientDcir( + p._1._1.patientID, + if(p._2 != null) Math.round(p._1._2._2.toFloat / p._2._2) else 0, + p._1._1.age, + p._1._1.birthYear, + p._1._1.birthDate, + p._1._1.eventDate, + p._1._1.deathDate + )) + } + + /** Find death date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with death date. + */ + override def findPatientDeathDate(patients: Dataset[PatientDcir]): Dataset[PatientDcir] = { + import patients.sparkSession.implicits._ + val mindeathdate = patients + .groupByKey(p => p.patientID) + .reduceGroups((p1, p2) => if ((p2.deathDate.isEmpty && p1.deathDate.isEmpty) || p2.deathDate.isEmpty || (p1.deathDate.isDefined && p1.deathDate.get.before(p2.deathDate.get))) p1 else p2) + .map(p => (p._2.patientID, p._2.deathDate)) + + val genderCodeError = 9 + + patients + .filter(_.gender != genderCodeError) + .joinWith(mindeathdate, patients("patientID") === mindeathdate("_1"), "left") + .map(p => + PatientDcir( + p._1.patientID, + p._1.gender, + p._1.age, + p._1.birthYear, + p._1.birthDate, + p._1.eventDate, + p._2._2)) + } + + /** Gets and prepares all the needed columns from the Sources. + * + * @param sources Source object [[Sources]] that contains all sources. + * @return A [[Dataset]] with needed columns. + */ + override def getInput(sources: Sources): Dataset[PatientDcir] = { val inputColumns: List[Column] = List( col("NUM_ENQ").cast(StringType).as("patientID"), - genderCol.as("gender"), + col("BEN_SEX_COD").cast(IntegerType).as("gender"), col("BEN_AMA_COD").cast(IntegerType).as("age"), col("BEN_NAI_ANN").cast(StringType).as("birthYear"), - col("EXE_SOI_DTD").cast(DateType).as("eventDate"), - deathDateCol.as("deathDate") + lit(null).cast(TimestampType).as("birthDate"), + col("EXE_SOI_DTD").cast(TimestampType).as("eventDate"), + col("BEN_DCD_DTE").cast(TimestampType).as("deathDate") ) - val persistedDcir = dcir.select(inputColumns: _*) - - val birthYears: DataFrame = persistedDcir.findBirthYears - + val dcir = sources.dcir.get import dcir.sqlContext.implicits._ - val result = persistedDcir - .groupByIdAndAge - .join(birthYears, "patientID") - .estimateFields - .as[Patient] - result + dcir.select(inputColumns: _*).as[PatientDcir] } + } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala index 70df2441..a25dade9 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatients.scala @@ -1,58 +1,77 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients +import java.sql.Timestamp import org.apache.spark.sql.functions._ -import org.apache.spark.sql.{Column, DataFrame} +import org.apache.spark.sql.types.{IntegerType, StringType, TimestampType} +import org.apache.spark.sql.{Column, Dataset} +import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.computeDateUsingMonthYear -private[patients] object HadPatients { - - val inputColumns: List[Column] = List( - col("NUM_ENQ").as("patientID"), - col("HAD_B__SOR_MOD").as("SOR_MOD"), - col("HAD_B__SOR_MOI").as("SOR_MOI"), - col("HAD_B__SOR_ANN").as("SOR_ANN") - ) - - val outputColumns: List[Column] = List( - col("patientID"), - col("deathDate") - ) - - implicit class HadPatientsDataFrame(data: DataFrame) { - - def getDeathDates(deathCode: Int): DataFrame = { - val deathDates: DataFrame = data.filter(col("SOR_MOD") === deathCode) - .withColumn("deathDate", computeDateUsingMonthYear(col("SOR_MOI"), col("SOR_ANN"))) - - val result = deathDates - .groupBy("patientID") - .agg( - countDistinct(col("deathDate")).as("count"), - min(col("deathDate")).as("deathDate") - ).cache() - /* - val conflicts = result - .filter(col("count") > 1) - .select(col("patientID")) - .distinct - .collect - - if(conflicts.length != 0) - Logger.getLogger(getClass).warn("The patients in " + - conflicts.deep.mkString("\n") + - "\nhave conflicting DEATH DATES in HAD." + - "\nTaking Minimum Death Dates") - */ - result - } +case class PatientHad(patientID: String, exitMode: Int, exitMonth: String, exitYear: String, gender: Int, birthDate: Timestamp, deathDate: Option[Timestamp]) + extends DerivedPatient + +private[patients] object HadPatients extends PatientExtractor[PatientHad] { + + /** Find birth date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with birth date. + */ + override def findPatientBirthDate(patients: Dataset[PatientHad]): Dataset[PatientHad] = { + patients + } + + /** Find gender of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with gender. + */ + override def findPatientGender(patients: Dataset[PatientHad]): Dataset[PatientHad] = { + patients + } + + /** Find death date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with death date. + */ + override def findPatientDeathDate(patients: Dataset[PatientHad]): Dataset[PatientHad] = { + import patients.sparkSession.implicits._ + val deathCode = 9 + patients + .filter(_.exitMode == deathCode) + .groupByKey(p => p.patientID) + .reduceGroups((p1, p2) => if ((p2.deathDate.isEmpty && p1.deathDate.isEmpty) || p2.deathDate.isEmpty || (p1.deathDate.isDefined && p1.deathDate.get.before(p2.deathDate.get))) p1 else p2) + .map(p => + PatientHad( + p._2.patientID, + p._2.exitMode, + p._2.exitMonth, + p._2.exitYear, + p._2.gender, + p._2.birthDate, + p._2.deathDate + )) } - def extract(had: DataFrame, hadDeathCode: Int = 9): DataFrame = { + /** Gets and prepares all the needed columns from the Sources. + * + * @param sources Source object [[Sources]] that contains all sources. + * @return A [[Dataset]] with needed columns. + */ + override def getInput(sources: Sources): Dataset[PatientHad] = { + val inputColumns: List[Column] = List( + col("NUM_ENQ").cast(StringType).as("patientID"), + col("HAD_B__SOR_MOD").cast(IntegerType).as("exitMode"), + col("HAD_B__SOR_MOI").cast(StringType).as("exitMonth"), + col("HAD_B__SOR_ANN").cast(StringType).as("exitYear"), + lit(0).cast(IntegerType).as("gender"), + lit(null).cast(TimestampType).as("birthDate"), + computeDateUsingMonthYear(col("HAD_B__SOR_MOI"), col("HAD_B__SOR_ANN")).cast(TimestampType).as("deathDate") + ) - had - .select(inputColumns: _*) - .distinct - .getDeathDates(hadDeathCode) - .select(outputColumns: _*) + val had = sources.had.get + import had.sqlContext.implicits._ + had.select(inputColumns: _*).as[PatientHad] } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala index b30c183f..c9ee1907 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatients.scala @@ -2,89 +2,77 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients +import java.sql.Timestamp import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types.TimestampType -import org.apache.spark.sql.{Column, DataFrame, Dataset} -import fr.polytechnique.cmap.cnam.etl.patients.Patient +import org.apache.spark.sql.types.{IntegerType, StringType, TimestampType} +import org.apache.spark.sql.{Column, Dataset} +import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.computeDateUsingMonthYear -private[patients] object IrBenPatients { +case class PatientIrBen(patientID: String, gender: Int, birthMonth: String, birthYear: String, birthDate: Timestamp, deathDate: Option[Timestamp]) + extends DerivedPatient - val inputColumns = List( - col("NUM_ENQ").as("patientID"), - col("BEN_SEX_COD"), - col("BEN_NAI_MOI"), - col("BEN_NAI_ANN"), - col("BEN_DCD_DTE") - ) +private[patients] object IrBenPatients extends PatientExtractor[PatientIrBen] { - val outputColumns = List( - col("patientID"), - col("gender"), - col("birthDate"), - col("deathDate") - ) - - implicit class IrBenPatientsDataFrame(data: DataFrame) { - - def getGender: DataFrame = { - val result = data - .select( - col("patientID"), - col("BEN_SEX_COD").cast("int").as("gender") - ).distinct - .cache - - val patients = result.select(col("patientID")).distinct() - - if (result.count != patients.count) { - throw new Exception("One or more patients have conflicting SEX CODE in IR_BEN_R") - } - - result - } - - def getDeathDate: DataFrame = { - data.filter(col("BEN_DCD_DTE").isNotNull) - .groupBy(col("patientID")) - .agg(min(col("BEN_DCD_DTE")).cast(TimestampType).as("deathDate")) - } - - def getBirthDate(minYear: Int = 1900, maxYear: Int = 2100): DataFrame = { - - val birthDate: Column = computeDateUsingMonthYear(col("BEN_NAI_MOI"), col("BEN_NAI_ANN")).as("birthDate") - - val result = data - .filter( - col("BEN_NAI_MOI").between(1, 12) && - col("BEN_NAI_ANN").between(minYear, maxYear) - ) - .select(col("patientID"), birthDate) - .distinct - .cache - val patients = result.select(col("patientID")).distinct - - // This check makes sure patients don't have conflicting birth dates. - if (result.count != patients.count) { - throw new Exception("One or more patients have conflicting BIRTH DATES in IR_BEN_R") - } - - result - } + /** Find birth date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with birth date. + */ + override def findPatientBirthDate(patients: Dataset[PatientIrBen]): Dataset[PatientIrBen] = { + patients } - def extract(irBen: DataFrame, minYear: Int, maxYear: Int): Dataset[Patient] = { - - val persistedIrBen = irBen.select(inputColumns: _*).persist() - import persistedIrBen.sqlContext.implicits._ + /** Find gender of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with gender. + */ + override def findPatientGender(patients: Dataset[PatientIrBen]): Dataset[PatientIrBen] = { + patients + } - val birthDates = persistedIrBen.getBirthDate(minYear, maxYear) - val deathDates = persistedIrBen.getDeathDate + /** Find death date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with death date. + */ + override def findPatientDeathDate(patients: Dataset[PatientIrBen]): Dataset[PatientIrBen] = { + import patients.sparkSession.implicits._ + val mindeathdate = patients + .groupByKey(p => p.patientID) + .reduceGroups((p1, p2) => if ((p2.deathDate.isEmpty && p1.deathDate.isEmpty) || p2.deathDate.isEmpty || (p1.deathDate.isDefined && p1.deathDate.get.before(p2.deathDate.get))) p1 else p2) + .map(p => (p._2.patientID, p._2.deathDate)) + + patients.joinWith(mindeathdate, patients("patientID") === mindeathdate("_1"), "left") + .map(p => + PatientIrBen( + p._1.patientID, + p._1.gender, + p._1.birthMonth, + p._1.birthYear, + p._1.birthDate, + p._2._2 + )) + } - persistedIrBen.getGender - .join(deathDates, Seq("patientID"), "left_outer") - .join(birthDates, Seq("patientID"), "left_outer") - .select(outputColumns: _*) - .as[Patient] + /** Gets and prepares all the needed columns from the Sources. + * + * @param sources Source object [[Sources]] that contains all sources. + * @return A [[Dataset]] with needed columns. + */ + override def getInput(sources: Sources): Dataset[PatientIrBen] = { + val inputColumns: List[Column] = List( + col("NUM_ENQ").cast(StringType).as("patientID"), + col("BEN_SEX_COD").cast(IntegerType).as("gender"), + col("BEN_NAI_MOI").cast(StringType).as("birthMonth"), + col("BEN_NAI_ANN").cast(StringType).as("birthYear"), + computeDateUsingMonthYear(col("BEN_NAI_MOI"), col("BEN_NAI_ANN")).as("birthDate"), + col("BEN_DCD_DTE").cast(TimestampType).as("deathDate") + ) + + val irBen = sources.irBen.get + import irBen.sqlContext.implicits._ + irBen.select(inputColumns: _*).as[PatientIrBen] } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala index 6b0bdc57..d8d5c3da 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatients.scala @@ -2,60 +2,78 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.{Column, DataFrame} +import java.sql.Timestamp +import org.apache.spark.sql.functions.{col, lit} +import org.apache.spark.sql.types.{IntegerType, StringType, TimestampType} +import org.apache.spark.sql.{Column, Dataset} +import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions.computeDateUsingMonthYear -private[patients] object McoPatients { - - val inputColumns: List[Column] = List( - col("NUM_ENQ").as("patientID"), - col("MCO_B__SOR_MOD").as("SOR_MOD"), - col("SOR_MOI"), - col("SOR_ANN") - ) - - val outputColumns: List[Column] = List( - col("patientID"), - col("deathDate") - ) - - implicit class McoPatientsDataFrame(data: DataFrame) { - - def getDeathDates(deathCode: Int): DataFrame = { - // TODO: We may need to check the consistency of {SOR_MOI, SOR_ANN} against SOR_DAT in MCO_C. - val deathDates: DataFrame = data.filter(col("SOR_MOD") === deathCode) - .withColumn("deathDate", computeDateUsingMonthYear(col("SOR_MOI"), col("SOR_ANN"))) - - val result = deathDates - .groupBy("patientID") - .agg( - countDistinct(col("deathDate")).as("count"), - min(col("deathDate")).as("deathDate") - ).cache() - /* - val conflicts = result - .filter(col("count") > 1) - .select(col("patientID")) - .distinct - .collect - - if(conflicts.length != 0) - Logger.getLogger(getClass).warn("The patients in " + - conflicts.deep.mkString("\n") + - "\nhave conflicting DEATH DATES in MCO." + - "\nTaking Minimum Death Dates") - */ - result - } +case class PatientMco(patientID: String, exitMode: Int, exitMonth: String, exitYear: String, gender: Int, birthDate: Timestamp, deathDate: Option[Timestamp]) + extends DerivedPatient + +private[patients] object McoPatients extends PatientExtractor[PatientMco] { + + /** Find birth date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with birth date. + */ + override def findPatientBirthDate(patients: Dataset[PatientMco]): Dataset[PatientMco] = { + patients + } + + /** Find gender of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with gender. + */ + override def findPatientGender(patients: Dataset[PatientMco]): Dataset[PatientMco] = { + patients + } + + /** Find death date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with death date. + */ + override def findPatientDeathDate(patients: Dataset[PatientMco]): Dataset[PatientMco] = { + import patients.sparkSession.implicits._ + val deathCode = 9 + patients + .filter(_.exitMode == deathCode) + .groupByKey(p => p.patientID) + .reduceGroups((p1, p2) => if ((p2.deathDate.isEmpty && p1.deathDate.isEmpty) || p2.deathDate.isEmpty || (p1.deathDate.isDefined && p1.deathDate.get.before(p2.deathDate.get))) p1 else p2) + .map(p => + PatientMco( + p._2.patientID, + p._2.exitMode, + p._2.exitMonth, + p._2.exitYear, + p._2.gender, + p._2.birthDate, + p._2.deathDate + )) } - def extract(mco: DataFrame, mcoDeathCode: Int = 9): DataFrame = { + /** Gets and prepares all the needed columns from the Sources. + * + * @param sources Source object [[Sources]] that contains all sources. + * @return A [[Dataset]] with needed columns. + */ + override def getInput(sources: Sources): Dataset[PatientMco] = { + val inputColumns: List[Column] = List( + col("NUM_ENQ").cast(StringType).as("patientID"), + col("MCO_B__SOR_MOD").cast(IntegerType).as("exitMode"), + col("SOR_MOI").cast(StringType).as("exitMonth"), + col("SOR_ANN").cast(StringType).as("exitYear"), + lit(0).cast(IntegerType).as("gender"), + lit(null).cast(TimestampType).as("birthDate"), + computeDateUsingMonthYear(col("SOR_MOI"), col("SOR_ANN")).cast(TimestampType).as("deathDate") + ) - mco - .select(inputColumns: _*) - .distinct - .getDeathDates(mcoDeathCode) - .select(outputColumns: _*) + val mco = sources.mco.get + import mco.sqlContext.implicits._ + mco.select(inputColumns: _*).as[PatientMco] } } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala index 02963d06..03b1d24f 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatients.scala @@ -2,99 +2,147 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients +import java.sql.Timestamp import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types.{DoubleType, IntegerType, TimestampType} -import org.apache.spark.sql.{DataFrame, Dataset} +import org.apache.spark.sql.types.{IntegerType, StringType, TimestampType} +import org.apache.spark.sql.{Column, Dataset} import fr.polytechnique.cmap.cnam.etl.extractors.patients.PatientUtils.estimateBirthDateCol -import fr.polytechnique.cmap.cnam.etl.patients.Patient - -private[patients] object McocePatients { - - implicit class McocePatientsImplicit(mce: DataFrame) { - - def calculateBirthYear: DataFrame = { - val win = Window.partitionBy("patientID") - - val birthYear = min(col("event_year") - col("age")) - .over(win) - .as("birth_year") - - mce.groupBy("patientID", "age") - .agg(max(year(col("event_date"))).as("event_year")) - .select(col("patientID"), birthYear) - .distinct - } - - def groupByIdAndAge: DataFrame = { - mce.groupBy("patientID", "age") - .agg( - sum("sex").cast(DoubleType).as("sum_sex"), - count("sex").cast(DoubleType).as("count_sex"), - min("event_date").as("min_event_date"), - max("event_date").as("max_event_date") - ) - } - - def calculateBirthDateAndGender: DataFrame = { - val genderCol = round(sum("sum_sex") / sum("count_sex")) - .cast(IntegerType) - .as("gender") - - val birthDateCol = estimateBirthDateCol( - max("min_event_date"), min("max_event_date"), - first("birth_year") - ).as("birthDate") - - mce.groupBy("patientID") - .agg( - genderCol, - birthDateCol - ) - } +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +case class PatientMcoce(patientID: String, gender: Int, age: Int, birthDate: Timestamp, eventDate: Timestamp, deathDate: Option[Timestamp]) + extends DerivedPatient + + +private[patients] object McocePatients extends PatientExtractor[PatientMcoce] { + + /** Find birth date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with birth date. + */ + override def findPatientBirthDate(patients: Dataset[PatientMcoce]): Dataset[PatientMcoce] = { + + val window = Window.partitionBy(col("patientID")) + val birthYear = min(col("eventYear") - col("age")) + .over(window) + .as("birthYear") + + val patientsbirthYear = patients.groupBy("patientID", "age") + .agg(max(year(col("eventDate"))).as("eventYear")) + .select(col("patientID"), birthYear) + .distinct + + val minmaxevent = patients + .groupBy(col("patientID"), col("age")) + .agg( + min("eventDate").as("minEventDate"), // the min event date for each age of a patient + max("eventDate").as("maxEventDate") // the max event date for each age of a patient + ) + + val birthDateAggCol: Column = estimateBirthDateCol( + max(col("minEventDate")).cast(TimestampType), + min(col("maxEventDate")).cast(TimestampType), + first(col("birthYear")) + ) + + val birthDate = minmaxevent.join(patientsbirthYear, "patientID") + .groupBy(col("patientID")) + .agg( + birthDateAggCol.as("birthDate")) + + import patients.sparkSession.implicits._ + + patients.as(("patients")) + .joinWith(birthDate.as("birthDateDf"), col("patients.patientID").equalTo(col("birthDateDf.patientID")), "left") + .map(p => + PatientMcoce( + p._1.patientID, + p._1.gender, + p._1.age, + p._2.getAs("birthDate"), + p._1.eventDate, + p._1.deathDate + )) } - def extract( - mcoce: DataFrame, - minGender: Int, - maxGender: Int, - minYear: Int, - maxYear: Int): Dataset[Patient] = { + /** Find gender of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with gender. + */ + override def findPatientGender(patients: Dataset[PatientMcoce]): Dataset[PatientMcoce] = { + + import patients.sparkSession.implicits._ + + val genderCodeError = 9 + + val gendercount = patients + .filter(_.gender != genderCodeError) + .groupByKey(p => (p.patientID, p.age)) + .count() + .map(p => (p._1._1, p._2.toInt)) + + val gendersum = patients + .filter(_.gender != genderCodeError) + .map(p => ((p.patientID, p.age), p.gender)) + .groupByKey(_._1) + .mapValues(row => row._2) + .reduceGroups((acc, str) => acc + str) + .map(p => (p._1, p._2)) + + val sumgendercount = gendercount + .groupByKey(p => p._1) + .mapValues(row => row._2) + .reduceGroups((acc, str) => acc + str) + .map(p => (p._1, p._2)) + + val sumgendersum = gendersum + .groupByKey(p => p._1._1) + .mapValues(row => row._2) + .reduceGroups((acc, str) => acc + str) + .map(p => (p._1, p._2)) + + val result = patients.joinWith(sumgendersum, patients("patientID") === sumgendersum("_1"), "left") + + result.joinWith(sumgendercount, result("_1.patientID") === sumgendercount("_1"), "left") + .map(p => + PatientMcoce( + p._1._1.patientID, + if (p._2 != null) Math.round(p._1._2._2.toFloat / p._2._2) else 0, + p._1._1.age, + p._1._1.birthDate, + p._1._1.eventDate, + p._1._1.deathDate + )) + } - val sexCol = when( - col("MCO_FASTC__COD_SEX").cast(IntegerType) - .between(minGender, maxGender), col("MCO_FASTC__COD_SEX") - ) - .cast(IntegerType) - .as("sex") + /** Find death date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with death date. + */ + override def findPatientDeathDate(patients: Dataset[PatientMcoce]): Dataset[PatientMcoce] = { + patients + } - val eventDateCol = when( - year(col("EXE_SOI_DTD")) - .between(minYear, maxYear), col("EXE_SOI_DTD") - ) - .cast(TimestampType) - .as("event_date") - - val ageCol = col("MCO_FASTC__AGE_ANN") - .cast(IntegerType) - .as("age") - - val inputCols = List( - col("NUM_ENQ").as("patientID"), - sexCol, - ageCol, - eventDateCol + /** Gets and prepares all the needed columns from the Sources. + * + * @param sources Source object [[Sources]] that contains all sources. + * @return A [[Dataset]] with needed columns. + */ + override def getInput(sources: Sources): Dataset[PatientMcoce] = { + val inputColumns: List[Column] = List( + col("NUM_ENQ").cast(StringType).as("patientID"), + col("MCO_FASTC__COD_SEX").cast(IntegerType).as("gender"), + col("MCO_FASTC__AGE_ANN").cast(IntegerType).as("age"), + lit(null).cast(TimestampType).as("birthDate"), + col("EXE_SOI_DTD").cast(TimestampType).as("eventDate"), + lit(null).cast(TimestampType).as("deathDate") ) - val mcoceFiltered = mcoce.select(inputCols: _*) - val birthYears = mcoceFiltered.calculateBirthYear - - import mcoce.sparkSession.implicits._ - mcoceFiltered.groupByIdAndAge - .join(birthYears, "patientID") - .calculateBirthDateAndGender - .withColumn("deathDate", lit(null).cast(TimestampType)) - .as[Patient] + val mcoce = sources.mcoCe.get + import mcoce.sqlContext.implicits._ + mcoce.select(inputColumns: _*).as[PatientMcoce] } - } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientExtractor.scala new file mode 100644 index 00000000..89ba77bc --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientExtractor.scala @@ -0,0 +1,80 @@ +package fr.polytechnique.cmap.cnam.etl.extractors.patients + +import java.sql.Timestamp +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{Column, Dataset} +import fr.polytechnique.cmap.cnam.etl.patients.Patient +import fr.polytechnique.cmap.cnam.etl.sources.Sources + +trait DerivedPatient { + val patientID: String + val gender: Int + val birthDate: Timestamp + val deathDate: Option[Timestamp] +} + +trait PatientExtractor[PatientType <: DerivedPatient] { + + /** Find birth date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with birth date. + */ + def findPatientBirthDate(patients: Dataset[PatientType]): Dataset[PatientType] + + /** Find gender of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with gender. + */ + def findPatientGender(patients: Dataset[PatientType]): Dataset[PatientType] + + /** Find death date of patients. + * + * @param patients that contains all patients. + * @return A [[Dataset]] of patients with death date. + */ + def findPatientDeathDate(patients: Dataset[PatientType]): Dataset[PatientType] + + /** Transform patientBIS to patient. + * + * @param patients that contains all patientsBIS. + * @return A [[Dataset]] with needed columns of Patient. + */ + def fromDerivedPatienttoPatient(patients: Dataset[PatientType]): Dataset[Patient] = { + val outputColumns: List[Column] = List( + col("patientID"), + col("gender"), + col("birthDate"), + col("deathDate") + ) + + import patients.sqlContext.implicits._ + patients.select(outputColumns: _*).as[Patient] + } + + /** Gets and prepares all the needed columns from the Sources. + * + * @param sources Source object [[Sources]] that contains all sources. + * @return A [[Dataset]] with needed columns. + */ + def getInput(sources: Sources): Dataset[PatientType] + + /** Extracts the Patient from the Source. + * + * This function is responsible for gluing different other parts of the Extractor. + * This method should be considered the unique callable method from a Study perspective. + * + * @param sources Source object [[Sources]] that contains all sources. + * @return A Dataset of Patient of type EventType. + */ + def extract(sources: Sources): Dataset[Patient] = { + val input: Dataset[PatientType] = getInput(sources) + + input.transform(findPatientBirthDate) + .transform(findPatientGender) + .transform(findPatientDeathDate) + .transform(fromDerivedPatienttoPatient) + .distinct() + } +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala deleted file mode 100644 index 3f9f10a0..00000000 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/Patients.scala +++ /dev/null @@ -1,109 +0,0 @@ -// License: BSD 3 clause - -package fr.polytechnique.cmap.cnam.etl.extractors.patients - -import java.sql.Timestamp -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.{Column, DataFrame, Dataset} -import fr.polytechnique.cmap.cnam.etl.patients._ -import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.util.datetime.implicits._ -import fr.polytechnique.cmap.cnam.util.functions.makeTS - -class Patients(config: PatientsConfig) { - - import Patients.validateDeathDate - - def extract(sources: Sources): Dataset[Patient] = { - - val dcir = sources.dcir.get - val mco = sources.mco.get - val irBen = sources.irBen.get - - val mcoPatients: DataFrame = McoPatients.extract(mco, config.mcoDeathCode).toDF.as("mco") - - val irBenPatients: DataFrame = IrBenPatients.extract( - irBen, config.minYear, config.maxYear - ).toDF.as("irBen") - - val dcirPatients: DataFrame = DcirPatients.extract( - dcir, config.minGender, config.maxGender, config.minYear, config.maxYear - ).toDF.as("dcir") - - import dcirPatients.sqlContext.implicits._ - - val joinColumn: Column = coalesce(col("irBen.patientID"), col("mco.patientID")) - - val patients: DataFrame = irBenPatients - .join(mcoPatients, col("irBen.patientID") === col("mco.patientID"), "outer") - .join(dcirPatients, joinColumn === col("dcir.patientID"), "outer") - - val patientID: Column = coalesce( - col("dcir.patientID"), - col("irBen.patientID"), - col("mco.patientID") - ) - - val gender: Column = coalesce(col("irBen.gender"), col("dcir.gender")) - - val birthDate: Column = coalesce(col("irBen.birthDate"), col("dcir.birthDate")) - - val deathDate: Column = coalesce( - when( - validateDeathDate(col("irBen.deathDate"), birthDate, config.maxYear), - col("irBen.deathDate") - ), - when( - validateDeathDate(col("dcir.deathDate"), birthDate, config.maxYear), - col("dcir.deathDate") - ), - when( - validateDeathDate(col("mco.deathDate"), birthDate, config.maxYear), - col("mco.deathDate") - ) - ) - - val ageReferenceDate: Timestamp = config.ageReferenceDate - val age = floor(months_between(lit(ageReferenceDate), birthDate) / 12) - val filterPatientsByAge = age >= config.minAge && age < config.maxAge - - val filteredPatients = patients.where(filterPatientsByAge) - .select( - patientID.as("patientID"), - gender.as("gender"), - birthDate.as("birthDate"), - deathDate.as("deathDate") - ) - - sources.mcoCe match { - case None => filteredPatients.as[Patient] - case Some(mcoce) => - val mcocePatients = McocePatients - .extract(mcoce, config.minGender, config.maxGender, config.minYear, config.maxYear) - .toDF() - .as("mco_ce") - - val allPatients = filteredPatients.as("patients") - .join(mcocePatients, col("patients.patientID") === col("mco_ce.patientID"), "full") - - val idCol = coalesce(col("patients.patientID"), col("mco_ce.patientID")) - .alias("patientID") - val genderCol = coalesce(col("patients.gender"), col("mco_ce.gender")) - .alias("gender") - val birthDateCol = coalesce(col("patients.birthDate"), col("mco_ce.birthDate")) - .alias("birthDate") - val deathDateCol = coalesce(col("patients.deathDate"), col("mco_ce.deathDate")) - .alias("deathDate") - - allPatients.select(idCol, genderCol, birthDateCol, deathDateCol) - .filter(col("patientID").isNotNull && col("gender").isNotNull && col("birthDate").isNotNull) - .as[Patient] - } - } -} - -object Patients { - - def validateDeathDate(deathDate: Column, birthDate: Column, maxYear: Int): Column = - deathDate.between(birthDate, makeTS(maxYear, 1, 1)) -} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsConfig.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsConfig.scala index cdf39c84..97666bc3 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsConfig.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsConfig.scala @@ -4,6 +4,7 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients import java.sql.Timestamp import java.time.LocalDate + import fr.polytechnique.cmap.cnam.etl.extractors.ExtractorConfig class PatientsConfig( @@ -14,7 +15,7 @@ class PatientsConfig( val maxYear: Int = 2020, val minGender: Int = 1, val maxGender: Int = 2, - val mcoDeathCode: Int = 9) extends ExtractorConfig + val mcoDeathCode: Int = 9) extends ExtractorConfig with Serializable object PatientsConfig { diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/patients/PatientFilters.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/patients/PatientFilters.scala new file mode 100644 index 00000000..989e810e --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/patients/PatientFilters.scala @@ -0,0 +1,76 @@ +package fr.polytechnique.cmap.cnam.etl.transformers.patients + +import java.sql.Timestamp +import java.time.Period +import org.apache.spark.sql.Dataset +import fr.polytechnique.cmap.cnam.etl.extractors.patients.PatientsConfig +import fr.polytechnique.cmap.cnam.etl.patients.Patient +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class PatientFilters(config: PatientsConfig) extends Serializable { + + /** Filter Patient with config + * + * @param patients the patient to check. + * @return [[Dataset]] of Patient + */ + def filterPatients(patients: Dataset[Patient]): Dataset[Patient] = { + import patients.sparkSession.implicits._ + patients + .flatMap(controlAge(config.minAge, config.maxAge)) + .flatMap(controlGender(config.minGender, config.maxGender)) + .flatMap(controlDeathDate(config.minYear, config.maxYear)) + } + + /** Returns None if Patient is old before minAge or after maxAge, otherwise returns Some(Patient) + * + * @param patient the patient to check. + * @param minAge the minimum year to control with. + * @param maxAge the maximum year to control with. + * @return an Option of Patient + */ + protected[patients] def controlAge(minAge: Int, maxAge: Int)(patient: Patient): TraversableOnce[Patient] = { + val ageReferenceDate: Timestamp = Timestamp.valueOf(config.ageReferenceDate.atStartOfDay()) + if(patient.birthDate != null) { + val age = Period.between(patient.birthDate.toLocalDateTime.toLocalDate, ageReferenceDate.toLocalDateTime.toLocalDate).getYears + if (age >= config.minAge && age < config.maxAge) { + Some(patient) + } else { + None + } + } + else { + None + } + } + + /** Returns None if Patient is died before minYear or after maxYear, otherwise returns Some(Patient) + * + * @param patient the patient to check. + * @param minYear the minimum year to control with. + * @param maxYear the maximum year to control with. + * @return an Option of Patient + */ + protected[patients] def controlDeathDate(minYear: Int, maxYear: Int)(patient: Patient): TraversableOnce[Patient] = { + if (patient.deathDate.isEmpty || (patient.deathDate.get.after(makeTS(minYear, 1, 1)) && patient.deathDate.get.before(makeTS(maxYear, 1, 1)))) { + Some(patient) + } else { + None + } + } + + /** Returns None if Patient has a unknown Gender, otherwise returns Some(Patient) + * + * @param patient the patient to check. + * @param minGender the minimum gender to control with. + * @param maxGender the maximum gender to control with. + * @return an Option of Patient + */ + protected[patients] def controlGender(minGender: Int, maxGender: Int)(patient: Patient): TraversableOnce[Patient] = { + if (patient.gender >= minGender && patient.gender <= maxGender) { + Some(patient) + } else { + None + } + } +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PatientExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PatientExtractor.scala index f753426d..38d452a8 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PatientExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/bulk/extractors/PatientExtractor.scala @@ -3,14 +3,15 @@ package fr.polytechnique.cmap.cnam.study.bulk.extractors import fr.polytechnique.cmap.cnam.etl.config.BaseConfig -import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.patients.{AllPatientExtractor, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.etl.transformers.patients.PatientFilters import fr.polytechnique.cmap.cnam.util.Path import fr.polytechnique.cmap.cnam.util.reporting.{OperationMetadata, OperationReporter, OperationTypes} class PatientExtractor(val path: String, val saveMode: String, val baseConfig: BaseConfig) { def extract(sources: Sources): List[OperationMetadata] = { - val patients = new Patients(PatientsConfig(baseConfig.studyStart)).extract(sources) + val patients = new PatientFilters(PatientsConfig(baseConfig.studyStart)).filterPatients(AllPatientExtractor.extract(sources)) List( OperationReporter .report( diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala index 04360c65..957b310d 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMain.scala @@ -8,7 +8,7 @@ import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events.{Drug, Event, FollowUp, Outcome} import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.patients.{AllPatientExtractor, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.patients.Patient @@ -16,6 +16,7 @@ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.etl.transformers.drugprescription.DrugPrescriptionTransformer import fr.polytechnique.cmap.cnam.etl.transformers.exposures.ExposureTransformer import fr.polytechnique.cmap.cnam.etl.transformers.interaction.NLevelInteractionTransformer +import fr.polytechnique.cmap.cnam.etl.transformers.patients.PatientFilters import fr.polytechnique.cmap.cnam.study.fall.codes._ import fr.polytechnique.cmap.cnam.study.fall.config.FallConfig import fr.polytechnique.cmap.cnam.study.fall.extractors._ @@ -99,27 +100,40 @@ object FallMain extends Main with FractureCodes { None } - val optionPatients = if (fallConfig.runParameters.patients) { - val patients = new Patients(PatientsConfig(fallConfig.base.studyStart)).extract(sources).cache() + val optionAllPatients = if (fallConfig.runParameters.patients) { + val allpatients = AllPatientExtractor.extract(sources).cache() operationsMetadata += { OperationReporter .report( - "extract_patients", + "extract_raw_patients", List("DCIR", "MCO", "IR_BEN_R", "MCO_CE"), OperationTypes.Patients, - patients.toDF, + allpatients.toDF, Path(fallConfig.output.outputSavePath), fallConfig.output.saveMode ) } - Some(patients) + Some(allpatients) } else { None } + val filteredpatientsconfig = new PatientFilters(PatientsConfig(fallConfig.base.studyStart)).filterPatients(optionAllPatients.get).cache() + operationsMetadata += { + OperationReporter + .report( + "extract_filtered_patients", + List("DCIR", "MCO", "IR_BEN_R", "MCO_CE"), + OperationTypes.Patients, + filteredpatientsconfig.toDF, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) + } + if (fallConfig.runParameters.startGapPatients) { import PatientFilters._ - val filteredPatients: Dataset[Patient] = optionPatients.get + val filteredPatients: Dataset[Patient] = filteredpatientsconfig .filterNoStartGap(optionDrugPurchases.get, fallConfig.base.studyStart, fallConfig.patients.startGapInMonths) operationsMetadata += { OperationReporter @@ -139,7 +153,7 @@ object FallMain extends Main with FractureCodes { val definition = fallConfig.exposures val patientsWithFollowUp: Dataset[(Patient, Event[FollowUp])] = FallStudyFollowUps .transform( - optionPatients.get, + filteredpatientsconfig, fallConfig.base.studyStart, fallConfig.base.studyEnd, fallConfig.patients.followupStartDelay diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala index 04c6089c..762e7a52 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtract.scala @@ -8,10 +8,11 @@ import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events.DcirAct import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor -import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.patients.{AllPatientExtractor, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.patients.Patient import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.etl.transformers.patients.PatientFilters import fr.polytechnique.cmap.cnam.study.fall.codes._ import fr.polytechnique.cmap.cnam.study.fall.config.FallConfig import fr.polytechnique.cmap.cnam.study.fall.extractors._ @@ -93,21 +94,41 @@ object FallMainExtract extends Main with FractureCodes { } } - if (fallConfig.runParameters.patients) { - val patients: Dataset[Patient] = new Patients(PatientsConfig(fallConfig.base.studyStart)).extract(sources).cache() + val optionAllPatients = if (fallConfig.runParameters.patients) { + val allpatients: Dataset[Patient] = AllPatientExtractor.extract(sources).cache() meta += { - "extract_patients" -> + "extract_raw_patients" -> OperationReporter .reportAsDataSet( - "extract_patients", + "raw_patients", List("DCIR", "MCO", "IR_BEN_R", "MCO_CE"), OperationTypes.Patients, - patients, + allpatients, Path(fallConfig.output.outputSavePath), fallConfig.output.saveMode ) } + + Some(allpatients) + } else { + None } + + if (fallConfig.runParameters.patients) { + val filteredpatients: Dataset[Patient] = new PatientFilters(PatientsConfig(fallConfig.base.studyStart)).filterPatients(optionAllPatients.get).cache() + meta += { + "extract_filtered_patients" -> + OperationReporter + .reportAsDataSet( + "filtered_patients", + List("DCIR", "MCO", "IR_BEN_R", "MCO_CE"), + OperationTypes.Patients, + filteredpatients, + Path(fallConfig.output.outputSavePath), + fallConfig.output.saveMode + ) + } + } meta } diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala index 7626e0d6..40a04c03 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainTransform.scala @@ -50,7 +50,7 @@ object FallMainTransform extends Main with FractureCodes { val spark = SparkSession.builder.getOrCreate() import spark.implicits._ val patients: Dataset[Patient] - = spark.read.parquet(meta.get("extract_patients").get.outputPath) + = spark.read.parquet(meta.get("extract_filtered_patients").get.outputPath) .as[Patient].cache() val drugPurchases: Dataset[Event[Drug]] = spark.read.parquet(meta.get("drug_purchases").get.outputPath) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala index e45b892f..fdc56f13 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/pioglitazone/PioglitazoneMain.scala @@ -10,7 +10,7 @@ import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.events.molecules.MoleculePurchases -import fr.polytechnique.cmap.cnam.etl.extractors.patients.Patients +import fr.polytechnique.cmap.cnam.etl.extractors.patients.AllPatientExtractor import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.patients.Patient @@ -18,6 +18,7 @@ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.etl.transformers.exposures.ExposureTransformer import fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformer import fr.polytechnique.cmap.cnam.etl.transformers.observation.ObservationPeriodTransformer +import fr.polytechnique.cmap.cnam.etl.transformers.patients.PatientFilters import fr.polytechnique.cmap.cnam.etl.transformers.tracklosses.{TracklossTransformer, TracklossesConfig} import fr.polytechnique.cmap.cnam.study.pioglitazone.extractors.{Diagnoses, MedicalActs} import fr.polytechnique.cmap.cnam.study.pioglitazone.outcomes._ @@ -50,7 +51,7 @@ object PioglitazoneMain extends Main { val sources = Sources.sanitize(sqlContext.readSources(config.input)) // Extraction: get all events - val rawPatients: Dataset[Patient] = new Patients(config.patients).extract(sources).cache() + val rawPatients: Dataset[Patient] = AllPatientExtractor.extract(sources).cache() operationsMetadata += { OperationReporter .report( @@ -63,6 +64,19 @@ object PioglitazoneMain extends Main { ) } + val filteredPatients: Dataset[Patient] = new PatientFilters(config.patients).filterPatients(rawPatients).cache() + operationsMetadata += { + OperationReporter + .report( + "filtered_subjects", + List("DCIR", "MCO", "IR_BEN_R"), + OperationTypes.Patients, + filteredPatients.toDF, + Path(config.output.outputSavePath), + config.output.saveMode + ) + } + val rawDrugPurchases: Dataset[Event[Molecule]] = new MoleculePurchases(config.molecules).extract(sources).cache() operationsMetadata += { OperationReporter diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala index fd7da8bf..af0ab02b 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/study/rosiglitazone/RosiglitazoneMain.scala @@ -9,7 +9,7 @@ import fr.polytechnique.cmap.cnam.Main import fr.polytechnique.cmap.cnam.etl.events._ import fr.polytechnique.cmap.cnam.etl.extractors.events.hospitalstays.McoHospitalStaysExtractor import fr.polytechnique.cmap.cnam.etl.extractors.events.molecules.MoleculePurchases -import fr.polytechnique.cmap.cnam.etl.extractors.patients.Patients +import fr.polytechnique.cmap.cnam.etl.extractors.patients.AllPatientExtractor import fr.polytechnique.cmap.cnam.etl.filters.PatientFilters import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.patients.Patient @@ -17,6 +17,7 @@ import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.etl.transformers.exposures.ExposureTransformer import fr.polytechnique.cmap.cnam.etl.transformers.follow_up.FollowUpTransformer import fr.polytechnique.cmap.cnam.etl.transformers.observation.ObservationPeriodTransformer +import fr.polytechnique.cmap.cnam.etl.transformers.patients.PatientFilters import fr.polytechnique.cmap.cnam.etl.transformers.tracklosses.{TracklossTransformer, TracklossesConfig} import fr.polytechnique.cmap.cnam.study.rosiglitazone.extractors.Diagnoses import fr.polytechnique.cmap.cnam.study.rosiglitazone.outcomes.RosiglitazoneOutcomeTransformer @@ -45,10 +46,23 @@ object RosiglitazoneMain extends Main { val sources = Sources.sanitize(sqlContext.readSources(config.input)) //Extracting Patients - val patients: Dataset[Patient] = new Patients(config.patients).extract(sources).cache() + val rawpatients: Dataset[Patient] = AllPatientExtractor.extract(sources).cache() operationsMetadata += { OperationReporter.report( - "extract_patients", + "extract_raw_patients", + List("DCIR", "MCO", "IR_BEN_R"), + OperationTypes.Patients, + rawpatients.toDF(), + Path(config.output.outputSavePath), + config.output.saveMode + ) + } + + //Extracting Patients + val patients: Dataset[Patient] = new PatientFilters(config.patients).filterPatients(rawpatients).cache() + operationsMetadata += { + OperationReporter.report( + "extract_filtered_patients", List("DCIR", "MCO", "IR_BEN_R"), OperationTypes.Patients, patients.toDF(), diff --git a/src/test/resources/test-input/HAD.parquet b/src/test/resources/test-input/HAD.parquet index 24095b429dc0d4ab761d06a740378e2e1008d2f4..bbc3928cc85233e01f17ce801c5b121b63316ff2 100644 GIT binary patch literal 59557 zcmeHwd5~nqnO|4W!2rWB3{kq z`tFvo@aza<8ILh$S(atlS&5Zcj4_t6#A1XIM(mhn6>Cf=2*pNN!A{78LZKjJg@V9X z;qS|Q`Ch)Wj(*ksk1L?2US_`he);8>Uw--Jm)ThO8Ie?hJ%(hpceOkt{+Guypwk! zq|++APaO_+>CfFkfOv~PV1&rZb_WB8_*+oHbtf6b_I*#akq7qc7!T#+QUpO#cyIbT zUh|PB_mMZ+DL~u00AQEDhXefN#6GBhtDHLs|3s$yBe}`}_%&QC&&6%v?SqnmgRGnx z*NX>rT-S5n#gq(%_OuU}NB;HzdEfv-Gt~o_PWtcQn2trTwV@FG4$|*V8!YXi&Om!8 zXom5|AsxmwO1e0b%COG6DX zF+D)Pp^zpyhuX*s*O|yl=`?NmeZGzdpyk(+XRk%F#u|Y1b^arq^ns&3s1?P+EV;g5 zz=tgrDL!^spqNhNNAgOMQe5a*CvRO(zIr{<<8SVS(ob&?DM<+frOP*vUmo^~(yKQL zlnTnAfzmIIkT;KbMd^{743v@vN>`4OCy#nX>HT8@rD!~3p!CEs^7EU#qV)1D0wqvd z21*~?Oy0QJD@qS_2$UjH(Lm{SKY85m6{V}6&?#LX&csL3Ci#2p6Xa(dUQv7XRueUg z321DWlsxb4gzLuBl3}`BqmOL%N1e zN9i0L<}dm>UiFcWK1sg*Nd%--V5$oSew)9&AHHxxYJ-1|_>T#54o*nd1nB(I)<$0Y z)Ny~iIssl&P?Fl*QtTKe_kW7KfBg6vd;@9?_>XgqefX~baZ>i}yZ!ODuN_gO$p#mu z>bUeLGp?WgJporClQ-e|_%`zTZGcO!0Gp{l+3@S`SX&cTkuIrUSWu%Zvj6{kNfoISDee#PFfK9K&Ql*(^99zZ~8>Z^m^d~d69}rt?Q32c4 z+sU)H12(-9#HK$nzBM4duiRlnQ%W&@pafsIgIxUwnglmc^UptHTQh47*8Hu{kjFlw z)!g)?{@R`TXum$6j#<>*FMor)^&2Jy)HJmx?>14Bm9#;XzkL^Z`Ysc--}O)MaUXp7 zCg`K;(o}h-luE3Xa+BE=d2RL@NSv6On%o|pIt)LHb5pCc+fx;#Z)@Hx>u1{TxXJQ< z;IQ}#)L>8&4+M44*QhL_LiMq)<6=9xc#2%S2O-x{FG&7HpZ|UzZ;$*uwr6y9a<;g< zl-#J67ni5E^ZmC#;&gI3Rf#L><44{P4=6@9uMQE;Tc+%oc`+CnCAYwV6em z2Hs-#Zr~=%tLBFO@U*F+&^xrDFSK{O)K1>$B$q?j&<5&<1~)A%A9m|-j^IRu^S){7 zf3}-k=r+kq(*%EbRwsC4CLbFKXZz`bOf+;aoh4V#ct!D*UY(-0gb}IzyoX%rF;TnE z=f9vy(7FB_p&KWrNBff5)$OIs=63(+`pv$ZpD0P$%ItPVT9Zp#lTvJbbPZ?Y)Rr<@ zp3cja_4(x3>fo%rVk18{nyL#Nv%Y4Qowq-yx8yp#v#9ty-QMvWh|fPHPkk1fQfKub zI$3{_kCZ!}K-*?ZiRVhW&GPx9*yzaY@Ywmm{Pt#aVX1#Ti3`iZde){v*J|$t_CX0V z_BVf9$9}!g?-V?&ao_fl_kW9g_?rmcS{2|t*1qz*0=fcxdx-p{EO{c^ zOy$q#bv?nypBzbygfqJB`HLL+L9Us~e^RioJfdVZ?w7fnyy;_-_-jX)AHu~iL$s@i%FrJ#L$OEIL?Pz$`&Puz*?!~}CR6@KmxR|=^D1^ceA<3~R7 z&M0~RcM+^h-GKide<$y>PYhlM^hS$|VdZ=!vr`ZCbCK zHz}G(qag(2pE=G@)89W$-mf%6;Nq-30oY|Q5V$f!o|(}IRQ*d_0?)jI5;znai^d}7 z^UF&cs`);GnFZhq@f33HOKamF12?&+D<;4BR`#`!dVv?5CXc3!W^N8j^oN7TV)o^z<$gL z?8m$gP*M65B#6@a?xN0VuItireguHhA@rE9<1(xS7RZYWV1?_|SSkf%h82N5DooW; z=}#gm*yp*pY>x`GFh#}I29KO4KV1S;dNqhje}YN@mCf4Vw|Y!$YU$o`arA_(5s6OF%<~$Ugvvfh#j^) z9nqh(MK7kp>l%9C_tgzI^vnQap73|p$y4hZda+6!y_gDw0DS|x<)baV>p3H{-~cHO zWk(_zHgr(8ys||uZ)ro{;3mfj4_)mj3FZOKr3K6A(t=O$F%04d|FI5)kBqDm9+3)c za$(-`$G=G4+ty^zw$=vQf3HsV7InHnvMI2wfOIBAMTv6YUUKKLG@jq#X#Qpyi_W1c0hg~yHYc$>jYv$See9cgK`$7#WT^f}~FZjrZ0E+LcG{CMW zFV>*atxAjf0!;e#P!B|e+cUL4|QR- ztiyxYKi%dI0(Il-HiiCxyZ6!)K$oj6vBeRESpak){@Ne;$fZA`I*V5VLP3pA=FUC( zIV9SH2FcxYEsov@%UZlA(0;dNG5+wA4hX;)pdUD+Wbep?nqkIRdn+DGnl;oOmBJv{Qbc<1Ogg=v2Cbx#EIhZZ}`Z^ z0H+B9>hhmE6EV(4SD*HgH=fpsc-M(f|64Z^?xUjrz1#$y(4f2 zh`#HeJPXk`&+4te%OB((sg5G-f3?N4xd?>d@}Hs8cHvIfU@6E7&>v77Ueu<)eeLAs ze-BW&TBG6&P{Ls;rD0)Vr0+iGBM&|YXxi?us}dBSEo1PYmwQ};#xAdWupjhwka0a2NgpNhUM9t-?V9l zGFTkZ;)s9g>-ZJelm7vj`aErhTis@T`RD)h+rMp|Cj$F1m3?;{yhC^baE!(Ke7^QJ za^XJ-y-i0IusEXh!~gOKY7W8hdl(#lKlzU+Y%Y}px-0kW`h4eQp`$t3Y+TZY z5~}CNUIxPd8Hl2D$1G>6kHXMp+(&_VXmJ`mTG~(sx6RwH2;})t&MOiGaLCNWXA|># z-+>9@J1Ap%RhEj)$k6IF3zMrYEF!!Ay8<%K7&>JdZ0tv0^^u?b7X;>RV>i$xAH60} z<~(7uwJ>GA0wMgDp(O8<1_-?TxU)s5XLjnYGbVG{ok< z{{~B;zoc!ehm5CU{?1#1EO7Q)#r*1Dfo=L%o{IUE?+cWvm;-|>V*cRwVSN2Hm%#R0 zKc47r3mjY-pPrl_^}pe3^B=yw-E;)!_O|}x&>J*IJNP0dY9U)NDTwL(7>FeemT87G z^%R9Mjm4WA($uJMQyM)(U`SJ={S9eqT)rVqji49P#8`Jrx*dHrG@O@h2lnv*Zcrik zqy|*0Pih!5dlG|(Z$I2$MK8R2Jt*nUUC&!rIv;v6NzoIx)?SX)4Ve zc`T9lzUbXEiaS-Pia{_r2QUj9V`5W{SqUo-?}Z!p&&}y2%|a3vo)k>455RL_ZDTB zU?H`29J=-Rtsjqja29!|M}^4_j`Who&*3(ChL$vT5SBD{nw2zmB$YIFu9P%(IFvMY z!jm+1Op`Qz){-=SU~ zFhUwU140@+)#N!Zn z;#HtJX)yMjG!aftnuri5tqaR0o`q3+7y8`VkmS~e43j3JfQe@TyX2#0VsJ@QVYQ^$ zNLkWs@GEIHo|SkRdP=+u3MI|Oc@i&UGfCs&mc+{tN#bQ#Bk?*Ek$4$CNS z=pl^<>3ADhL%a-zAx*@$kS0S`b{t*O!Y~fvW#|QIBATM(g@ukoZMWVPIJh*rFfqCk z_}}ASJTbE{F}XQ1zF1wGUsyTu+1q~S3wPaif<2DVyh)n+#S;so^OKOjuvV=?iK&^% z>cogKjnz7`y0wG_PpmD>j4w`1o`BK|Yx7H6TE^no?Bw_=rq55Vj!ulOjzaM-o|s&K z;xh{se797L-raNJ-g|X0MieLbmM*=66{|7;vS#lH8@Bo0y8xov-V=E|5d=FmR>ESA zw={W+VH4X*)|BZnWp-Jr4OKnEdx{W)V^2}d@Rp();bWyJWOz)G#Shs~)HA%Ls741_ zDGC{0Q>3v^b`-S?Zz-ygRaS~ZhPM>exGgJ1A;VjWYE+n&qLAS&Md5x`D^*jb_ml0+#04OF$KEM$6587{iEr>bUn zPZ649Pf^YAmLlp`oXOY~qE^Dzd~fLsH%sE3w^jf)1-wPTt$ADOrc7@s>%6r#+nDDq zU4f-m!lq1bDdXjxR+2o`M9V>8X~}N0pL&Uedi$o8q&43}T$t3GQq%$g2taoQpysy- z01I|^2B7D+4gdt!y8=-2TL>U)qgi+t0SCe?zjXlMz|@=X9VZLWQLG$qVLIw6b^hjL(p#w#WJmep&l=D zg&~w_1q|^pnz?*qYIJ@kK!c@kDLu>I)bQj%Al9R3w;CwXZ*GFDMRYyPI z0av)8&4sv@Umx+t^c6i{>)LjDzPWX62N$Yq+tvK~+TJMRYxlf%-n~T}VCUwYIiTma zfCH>@df`kh1X`i`U^FO%00ge%bj6_No5*{lhk!oJl&H8`Lbst-nkQczPD2?go{&=4PJ?QYsgC{Q2yCfbi0$#ptF z5JI6A0@3CjTVsu00fAstv@Hdq&c5zIS~Eq%#Te=hpZ0^pnD03kpn86*rz3r_fC+#H z`VkQD>q#6Dhy_}LK)iO-5rw&s35li*jJmRcL5i$U-H(o*L2DTk4DY%GZpC!ug?jl5 z=C_2tdV`E3eIegO-;-|Ochxx37Yoz}K*LpQ&V(KoW&*2DGUm5{cund;f%?cdHU`9d zejGc%T*z}E`n5v#7>xNQ@*WIAsK2H%u$E~F4xzuT;s_Xod=q_5=#4s2ORhB*uyUbx zd+I`g`pCP8BR&1FJUP0$xJ*w-RA*FT+q-nPJDDWA^_e3sp*8F=tj-zofT$O%c3a_~QX`Xs4Kk}FbF zk}6WTLepevL(Jd6L`BOFSGL$QeA8NJKGeA($&v*B(n^p9z675k_+Q=_8t9%I8ca!2 zHnj=QXA|?)wc%K~Yp^rDJ{B*pjxD5SGBfhnU;!>_ombPUy-Q>B`RZh>yq%a`tPGr9 znHigpt_~LxXA-gQ&{%9^B@s@}j}DYqhNHb}Lj$2ImK$3ruZ)GEo&|Xt-k%;1^#&&g zWtx{*D6fqUq}PXIrJ4S)Gy`Q<$K%zhiFkE`)*DW)56v%Chfe2M{y;f6G?1U3h$)B< zaF^GHI(z3v2f7!KR-#%i11N2CGA<87}#8y0-hUiR+EWbDNpHBy7R+J z!vmXhgPq;eV*@4NgEUi`o0juYY)g=~VLTRv{)}!9p3ZlV18=d+#6b5F@(TEh{RFf^ z8g-r&zRKDOGBqf7i)|R|Or9SeNC#Pds5%jgf*j|kH93n1R}yhS zE}`#=Rp4Ezm-U$q@A9SY(qJ$Ie`RR*apg+g8K*vFxT2Zj%&y|f6w1k|5{wNeT*bL+ zVK4+^g2~Bl^imMbX5A6xA(o!v4eS3f_R#19fapmWxvwMIG z^|~vrk`gNJ0WKw1&b#8uE78&(;EEK3(^JJc!mD#K8OE;nc7Xl7$!OOc zpSfUfVlb4B4n%tk`7*OJc5D7rZHi2Z%{?~!qjM|FVal$~>!7px=)CTPtD24$s_ym% z*kr-fc7o!zvFql?V!hK7k)ej?a+G(cxrxo0dD&%dS{hr(ZKv3rYGVUc*CryVtp@Ci zVlkAQa>G;#_7(<%yNW3~pp>V8ZW8F0@@$^Ak*)GjzT8_Z2CGx~XkMXhwi}yZvrjZ< zpF5081KpToUzVq%FfOx&a<5WWh8*P|ZB%bo8|}sawYl78?6NgK z+D*Ydv7NFd7$3QioKn}$Zde`5?pA*bYkW7Hx{qWJ)`rxFqxPrMd}5G~-CesH`_Rdz zngf{)^+rm{rta&pn~!(qGqayl7OK^uL1wGjjF;}=fhe-GyPr*2jgDoQ$=bUxU@cicb&|t1J&H5f_oBdzS%*E92 zreQ72^_HFF&xEzF7-Bw8yK&rVtmSGH_`2!n6BDowl#s8A^Bl7)ScAQeelinQSeNdd z3{bv?WXJW18EYx(yx!amz5;_-Kf10Wj zYaw6S*ek5zyi%P4{|8(9+4#nFc}*&?4ui4fEUyMVZ{5dfX1!1MJ#y9FsoY#1*Rs)) z<62hJ!?aC#rJSSuZ(_bO$wyuLAUdDYS=fgGK6N{;%N)zvVQs}+ug|Z56MT!{pRO&d zv|Zp!t2oMPEBhb&b1<0Bs(u~onnQ1s0(*HV(@Y23@pRra&wDJZd1WFq7?R~;ZCO?F zL0Eq(QKl>Gu(r1U`QBI^alG+GLVinYg5ye7T3wvURg-`>UB}v?J6x*vLi?Jw#YCTUgZ2%j4f7UTW4ZEmE{XddahLc7 z5r@rsT=Xl{73-ntrXr_Z?M;^yws%$+R-wF|E;HGNHkeJeMPvRBDg)Gh2!?WX*t&M* z?Z%tQ_O|+HOzmn+CcxGK+n_W6c2{2MW#h+fEn!#Q&i1L{SK1-wD9jsjDa6)^Zn9BQ z$}n$4i!SqW%jB5R4&j;tu%_X1QyL58X8-2$r8LGnfnN+T2YWUh^;zrr3-%)D;BvZH zRQG{v^I2%Cv;9_s-#Z$EX8r+kjDfVq+-bIHNJd!_+Uo3kXyqEoEGw#?vk_Tgd+~cS z76tw>zi1<}l4j$=PWQE3RWJ7F6gZeosm!j#SVEeksL?Ks(?P9^E zwwniAYSV*#46!9fd(4%$HsyC+hFLsJZf=qKM_gr>#nyxiP3yPj?IQ5?SFu0B4%G($1#3^O<3EPO5^a*?sep8YuE3be(gO~u;YRMVDC8Z zsTyR`>Yv_$Ex>6*EPZ``D6L(hjN91+H87jjppC8PDb{-D18kR2wh62|;jdB8yW1_s zvQ6l&9qIu=&%@qY9X0{AADDdx`onoYco$@WV%O}GI{0j|eVUlhcC@af`kwkx1=)19 z6PX+{8JSD=IqJ0<>v%(7)NeP)7=J^3s5SS8qZoe^0Uyxb><^b=e>j+y;S8Ea-;u92 zW$pAs>()2OtMU-=N z<(q!CGg!{jvE$BXYx&*hHJm}oqYt%4oQozGid&pZOJz7DC*bUCUE{5!Ov4#bh#z1! z+-~Y!lwXLiiGn=BzJ>ezW>>ZetWB%QitI8AM8 zf}c&#xk!HiVjN*SVT^Zux~n{gvO;6X_egi;VSa!;GuYp&wKoBK0?;;yHF4jYXxaLV z($`}hYQ_vWmEYa@7x=5L4{F5>Al{wJZ*%&9(ryH&sjMAO4P)NArmSOq7Mj328ir0M zAyz!UK0X8YKHypVSaoK6KDsrD_XLEK8p^H<)Qqd0##-3S*pNuWE_93z=ax8d;@ZzKKL2K=S(FF^>sy{{%NTP z>f3;4sf5p`;VuuUFPDMxD-ozajnCPQG}Na=pq3dah~?o~8p7w(aAyaU2axhc1QN6O zELGq+k_*Gnd1*zGq8t5?IF8S!r9~_ccZSG|cvnb2K4+yQ;0cDIz6og#Uai1$Bp8O5 zR`4#6*aqGOvWU;!(m1?di6cE3>Pz8sHW$P8z#Syg6y6o0Nbn3L;CT~SoyY;JB;g7?M-~zKd8ko}Rj?^5_9B%Oiuf#7;5jmmuP$O$ zu^d*lh|g!>htv;0BpDdXFG;~?Y4L3K)G7E^mZqU%giasu8$aR7*>##PN!Yu{75K3I zIy(Nf`BRETR!*J5>Xk4s0zR}p>|XXs&#tk2=yaL_eF#7CJ`RI^09f;xZMe?`vU4RA zR0$i-QRA-}%-_UX^!J6cHK;$KCEFZN6lL0ir=RA7ObPAL=L=}5_P%|4;zi&LWhZM7 zjtLbH*mG0n`A>?>uDbH4KVqy8$AC~D_uGK3mBurtRsJME(5(D9lfnCNFb78?(_rcy`LI|D{R~lMkEz2iqCWC!t+{F(OhX;@-aK?1q6~IEKacaoZMBW0|wA z{U4_J#`odw2#7h#XLHc*LXGz`m@mi=_LoI|;@hgcI>>XjXl#$E|IKSRsh`WgVlR(< zG+!8B+S-J!Z>3C)_u2R|y`PT2xmFsNF3Gd1d^xly8QFB}9~RREYY*t35|^)>z5H!u z;|BS|IYX5{Jl|vX-$=7H>XWnm_11^JG-<0weYra8gS}H)pPY@T@@U`QDN}toXIa{d zc79IkQOA>$z76(3FTvY3;|%;j*<9;~o9m8BR%EyC)e(1^;4g5Fr zi|?1C{h^Krhw&l}^)i0)@nUHo-mL+9-(WP(y0~IOubU=2Ig}?>`}agyzVJ9+;{@5FOeVk=)xyx$9(7`s7gCq9m>LfMJ5mDC*WVXn3EIg_;Hdi$%Mz)f5G7-__tvH8}^_f1_Jb@ zQlq|}I_iV7-*A?S)(85Bjc*5hQrDes`j_fUdZq-%JRg4!{L0XLQ~QMTJh1lwtXxs$ zS(fuumzd;J8pnLoe6HW85G)sq<&-qU$CrbBQKI=q`$9Vl1;b*l#`_hg_u+0tv~M1iT9u)$!+`FO%D64EoY! z@8MZj^#2V&{eyh`IqH}>vZ{RM?AsGrSN$9I?ZG<~07_AnPlxs>{h$i$`v>pB!22aYi7(#C^_%^C*LjNR zH}I_)^+B#Q5v6f;(4GaYJ@)b&&8qED*Bj`Vlj8KA5d0-=Ywz2ym)2?i3Y3(M_n+h< zT^zr?K8(=z(ff}TSbxI(Uw8*%u1v>2I6;s<4*FOwVm|svCBZj>_tr=yJ{}#$dyeMQ z@ovOhrgsTRl^XAl(|pQr>-|E2XQjsbi!>iyV=}!zD>qlpfc@{@-W1KJu1oE{C}5K+ z&kplVWCQgH`jDk4y@N};2TKZXtK-RuzcionSEHx$XCSMNX9s&Hn1!YY`jFCB^t<5j zE;A{i(YId@NMM5-*7H=pg!}H`E;g81c>BZXxVHF*BN?a!*DvrfzYq82Rlujf^qUNS z_X0n3eL&@(KH}Gq1X(e5pEgT0w+HXklftv=cy^et`oKqHXb(gVnizcla@3X*AzB%y(O4Zyx^8>Fu^P#^bc(s_MH3f z*Sz;6kKRn#4{EIO+>i6S=bU@)x#!+<<%B=!Q$1>(S{H$TJT6zI%jNRb!5?3J1OHxM z=Tlv3wOV~tt%Poy)yf0St|OJCAGGPlH=qDwH~duWQYxyTV^|GGpf`nWkYMTeMjXQU z-c$!e>{Is@4#C>=N=D6xa97-L!5EEdBORjxnihMyHp4%`p-8Z6a1(s%P4|q3ZICOV zr@q&R!l4V2qgue*J9W|ZCP?uhC};o+|f)N~SPp;8E_d$88{ z)LciT6322?fQZ}FZ4^m?2Gmm_j8dm7bpSyA{Tr#v_5WW@#Old{-3k*vm7N>TP0ZE# z)#Fq6l}&O{INp(G5vQEgeX}cunB*x5V1Cw3%Gz! z1%xPBAg~F40Ad|At=aTHwUmFL{(w5PJs{H9)IiU0ceFj<-IYts2J$0Ypt65pAh(nq zsDiKQ(Sf;6xjHg|VK=nW3d* zWFJrrb<9Ku!=d?}Y~Of$ZfJ12cXkqa4Rl3^$3r8j-roLTS1vz1X~WPyJ7Rs0h1AU0 z|2emMNl`B`P3{6|`iHWe9f`T6srbdENOpcNR(1OmgCk3Ee?HK?nDe*IXYaa{jI zqI-A>9YlWGKQli%G&eXhwCKOMxX>Ah zQESv1YFZd^w)##sHDbuucJesJY6Z5{3>J#@`Xa`qWKF)CdK@#XCPUJDOW;8Erk+KQ_3y zkVnB1Q;Xdx{4$@LKD%UtyW32qZz426Vk{FnRIOS|#oPu>+4N*fXnHU{JBHXJ=ci}0 zHZpG_4f;0Go1>!c%5K^obxa+T6mq_;ueB{Wot&9kxX|lAmq*U~0-YnB?TN|mK2+Jl zQrCt4!QoWz&{Tf0KR98hAafsmw;7j9MRi3^5hT#VtrQt0C9yTG<*n~gpqpukfuFI1%s*)w~s4uE7O5UVz z2go;-?_9djH+FtFJ2I8Vm6}Yo<)=Vuvv=81&{N+;`ayrm1*{{qY^JDU%glSQRwti0 zGor+YSbwI=Gk}cJZ_6gu>K^<=HbqHhQ$}$vtc>D3LRxoD3OA-Yhyx z1vKt}NZdt&BBVNjR&b2$Ot?3iZ3lKX_c7Dfv<_qIG|5VFuG)15aMtu!XUl_!bh^9Z zxh|aO3c_U``|4}3yB64jy`V`aU!c1*hv0{$pbUffq2YEsQw)dZ!&AvvA~iD>jK|NN zUC84BWIi7b&Rm=uS_tQZBiVGmqifM-p7+{x(<=a&4@|>cY9PjCgzovLfB)G_#;3OV zh*V-^3U&a7o7HBj1N2k_kJqLLub$RE_QbgC(h?Ff-xKL&cYu0CPK>4AJh<88|E`2mNWeyuBqJ&cq7GAa&+(h`6yX|d^6UiP{y_d4VmRl$lO|iCWQyX21Xu@oeR)34IWY)j6J%*D6>~|SjksDzB z8Vx7{8$B9NXJUW_Mnyb;JVY#z%s@+_ez6kdHnhXwX=Nsw@UPVX%13NQkpLS`pHF0h zak?0pT!~Oi#-BY4WA&kP=?vQdA8K++n~KD4kkENz)YbJzr6 z!FWr7=Ab$e_8Sc00Bqo7$PZpTAR7U6B}!xJVO)x6ouA7Ask;R=NMc{;w{0D-Wnu`_V_b&+xLn@biU8!H3BkIrX!jLpCtx=DejBi_-2#m7hA>A2@1~;NRJ>oi z5w9T2Zv%A0BY;>eL_n@6+7-on!-F6!zi&govdKgbE0WM(Q?zS}cV*M*b#nYRcy~8T zcr^ARc=Ou4_wMG?aJ-gN{Wb`9w#W!ZB*hW#Y$*a^MVfkut4Msu5mqXTK)AV8;sKo( z1S%Zi=GGz*-rXi4&=`^#MR?!029v%5KNQ`z&_h+3)U)u zeYO|C#KQsv6Qu&Mr~7Qc;1I&q`*fdKy*E@_5@URvksGR+vQRO}F=V&!h5+dNN?6xYdIKUp}Xj0gQlP^&s&`rl$cmSRd68>nEWK zIo}cxV*~%F1|7m$?*nYWKK|ALbsLR)Sd2_h$OQJDqTN%xAAjrg1|A9640G+U{V;Im zCc`{bw1(1y>E6YSluu@`7-# zOo#L?f>W)$VCL}CHK^dafm3~Ip?dc_Vp)&_0o>29+YCbXm7;y6c;EdFaa`*@Y_PG0 zA0D+zLhnR4Pd_|rmgHlgf)!CrT-mXO?<6(pi#Fg*bL%_8Sfp|mF-cY<(zDq#J>j)c=zt3_G88SCm&+lyoazsU2d>WIlW*a z+~T5KZZJ>zU5SeEBHci7%DWcI)io&T6&0skwNS1!3i+9GaeG%9iE@j0 zOJSpt51Pep!3>k$hmj%PRi;wJWY$l{TIRb5LNsmtOElGY^0i4sP5^}Ab1UF8tH?<6@d*S>G&6)TcauZlQheCBd_-d$>3yREF-~d#PC6-#Q~!yyec7AbzB1A1U5jXK=aOzbdg= zGEe-1O=euujV%}OiQlZ)>j6SZ78?_gUcC}3Bc!|@00GgEY~P^RF!Yla!H|F@x15}r zpR|~nd2CRbm?5{U5;KomHptA=pq-2W+Z~l)p9TR|JXyg7BuaKGdNm|qh0^)WwFJEy z+91bowh3siKI58Of_<|MU^f!zUk!^{Vf&4Idrq)%@m>v^r{+G8vSX#p5nQhmDvU?& z0~N$0?XMqzaB?(vcC1Pv8;OuDWh>9+(P;kNDG{@$c+)I z>xy<=@jeAQ$O=iXh-_wP`DFq{PQ1jP3iD|M?vh)(XCzze@+&5diZJNi<@Qw1q&NSAg@~d=@Jm6=YRqQ{9JAWV(=G*t))Uho=-q* zSQ|_hc%WDUiu9;apn#{}nBNw!AWH**$DpMFK+exO04@?-^TxwW=Qz?+OneOC9Ej~^ zC4mU3oHHUxsx6wtD-RI=je_#R)t48pp}cU7 z<%Mf14;&;1O9$e=i6NjIu5Kw2D6pK`EX@s;1Bm1t$^!%$h%y0{bhfA456i}shbtrP zR|fG6Ao5!eht}@|B$7aU{P8fn;E`wPVTscM~d4;lAC(Tfli6>mg z@XJVLLo3%}Hr5VF`IpF>$Rd^w#1e}F$%(mRijMv^R>?qe`cQy5GSkA$p#UpEMED)C zl5uE$P~bQfj-?DsM=H+}#B5{PkmO8Zi2{n`p#XAX1BIExlCjF8Dk(E4I4aVjUU}W6 zchI!pvGJLM^SPPX;mL`EzFJ>hZQa2mefiu-OH(SXYnfd7LH{=eq`1{I{Di2wiq diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/AllPatientExtractorSuite.scala similarity index 53% rename from src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsSuite.scala rename to src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/AllPatientExtractorSuite.scala index 32e7dc65..f82bdc37 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/PatientsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/AllPatientExtractorSuite.scala @@ -8,39 +8,37 @@ import fr.polytechnique.cmap.cnam.etl.patients.Patient import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.util.functions._ -class PatientsSuite extends SharedContext { +class AllPatientExtractorSuite extends SharedContext { "isDeathDateValid" should "remove absurd deathDate" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val df: DataFrame = Seq( - (makeTS(1989, 3, 13), makeTS(2029, 3, 13)), - (makeTS(1989, 3, 13), makeTS(2009, 3, 13)), - (makeTS(1989, 3, 13), makeTS(1979, 3, 13)) - ).toDF("birthDate", "deathDate") + val ds: Dataset[Patient] = Seq( + Patient("Patient_01", 1, makeTS(1989, 3, 13), Some(makeTS(2009, 3, 13))), + Patient("Patient_02", 2, makeTS(1989, 3, 13), Some(makeTS(1979, 3, 13))) + ).toDS() - val deathDates: Column = df("deathDate") - val birthDates: Column = df("birthDate") + val deathDates: Column = ds("deathDate") + val birthDates: Column = ds("birthDate") val expected = 1 // When - val result = df - .filter(Patients.validateDeathDate(deathDates, birthDates, 2020) === true) + val result = ds + .filter(AllPatientExtractor.validateDeathDate(deathDates, birthDates) === true) .count // Then assert(result == expected) } - "transform" should "return the correct data in a Dataset[Patient] for a known input" in { + "extract" should "return the correct data in a Dataset[Patient] for a known input" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val config = PatientsConfig(ageReferenceDate = makeTS(2006, 1, 1)) val dcirDf: DataFrame = Seq( ("Patient_01", 2, 31, 1945, Some(makeTS(2006, 1, 15)), None), ("Patient_01", 2, 31, 1945, Some(makeTS(2006, 1, 30)), None), @@ -59,11 +57,6 @@ class PatientsSuite extends SharedContext { ("Patient_04", 3, 5, 1995) ).toDF("NUM_ENQ", "MCO_B__SOR_MOD", "SOR_MOI", "SOR_ANN") - val ssrDf: DataFrame = Seq( - "Patient_01", - "Patient_05" - ).toDF("SSR_C__NUM_ENQ") - val irBenDf: DataFrame = Seq( ("Patient_01", 1, 1, 1945, None), ("Patient_02", 1, 2, 1956, Some(makeTS(2009, 3, 13))), @@ -91,17 +84,69 @@ class PatientsSuite extends SharedContext { dcir = Some(dcirDf), mco = Some(mcoDf), irBen = Some(irBenDf), - mcoCe = Some(mcoceDf), - ssr = Some(ssrDf) + mcoCe = Some(mcoceDf) + ) + + // When + val result = AllPatientExtractor.extract(sources) + val expected: Dataset[Patient] = Seq( + Patient("Patient_01", 1, makeTS(1945, 1, 1), None), + Patient("Patient_02", 1, makeTS(1956, 2, 1), Some(makeTS(2009, 3, 13))), + Patient("Patient_03", 2, makeTS(1937, 3, 1), Some(makeTS(1980, 4, 1))), + Patient("Patient_04", 2, makeTS(1966, 2, 1), Some(makeTS(2020, 3, 13))), + Patient("Patient_05", 1, makeTS(1935, 4, 1), Some(makeTS(2008, 3, 13))), + Patient("Patient_06", 1, makeTS(1920, 8, 1), Some(makeTS(1980, 8, 1))) + ).toDS() + + // Then + assertDSs(result, expected) + } + + "extractBis" should "return the correct data in a Dataset[Patient] without MCO_CE" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val dcirDf: DataFrame = Seq( + ("Patient_01", 2, 31, 1945, Some(makeTS(2006, 1, 15)), None), + ("Patient_01", 2, 31, 1945, Some(makeTS(2006, 1, 30)), None), + ("Patient_02", 1, 47, 1959, Some(makeTS(2006, 1, 15)), Some(makeTS(2009, 3, 13))), + ("Patient_02", 1, 47, 1959, Some(makeTS(2006, 1, 30)), Some(makeTS(2009, 3, 13))), + ("Patient_03", 1, 47, 1959, Some(makeTS(2006, 1, 30)), None), + ("Patient_04", 1, 51, 1966, Some(makeTS(2006, 1, 5)), Some(makeTS(2009, 3, 13))), + ("Patient_04", 1, 51, 1966, Some(makeTS(2006, 2, 5)), None), + ("Patient_04", 2, 51, 1966, Some(makeTS(2006, 3, 5)), None) + ).toDF("NUM_ENQ", "BEN_SEX_COD", "BEN_AMA_COD", "BEN_NAI_ANN", "EXE_SOI_DTD", "BEN_DCD_DTE") + + val mcoDf: DataFrame = Seq( + ("Patient_01", 1, 2, 1985), + ("Patient_02", 9, 3, 1986), + ("Patient_03", 9, 4, 1980), + ("Patient_04", 3, 5, 1995) + ).toDF("NUM_ENQ", "MCO_B__SOR_MOD", "SOR_MOI", "SOR_ANN") + + val irBenDf: DataFrame = Seq( + ("Patient_01", 1, 1, 1945, None), + ("Patient_02", 1, 2, 1956, Some(makeTS(2009, 3, 13))), + ("Patient_03", 2, 3, 1937, Some(makeTS(1936, 3, 13))), + ("Patient_04", 2, 2, 1966, Some(makeTS(2020, 3, 13))), + ("Patient_05", 1, 4, 1935, Some(makeTS(2008, 3, 13))), + ("Patient_06", 1, 8, 1920, Some(makeTS(1980, 8, 1))) + ).toDF("NUM_ENQ", "BEN_SEX_COD", "BEN_NAI_MOI", "BEN_NAI_ANN", "BEN_DCD_DTE") + + val sources = new Sources( + dcir = Some(dcirDf), + mco = Some(mcoDf), + irBen = Some(irBenDf) ) // When - val result = new Patients(config).extract(sources) + val result = AllPatientExtractor.extract(sources) val expected: Dataset[Patient] = Seq( Patient("Patient_01", 1, makeTS(1945, 1, 1), None), Patient("Patient_02", 1, makeTS(1956, 2, 1), Some(makeTS(2009, 3, 13))), Patient("Patient_03", 2, makeTS(1937, 3, 1), Some(makeTS(1980, 4, 1))), - Patient("Patient_04", 2, makeTS(1966, 2, 1), Some(makeTS(2009, 3, 13))), + Patient("Patient_04", 2, makeTS(1966, 2, 1), Some(makeTS(2020, 3, 13))), Patient("Patient_05", 1, makeTS(1935, 4, 1), Some(makeTS(2008, 3, 13))), Patient("Patient_06", 1, makeTS(1920, 8, 1), Some(makeTS(1980, 8, 1))) ).toDS() diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatientsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatientsSuite.scala index cbd21efb..798a477d 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatientsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/DcirPatientsSuite.scala @@ -2,140 +2,160 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients -import java.sql.{Date, Timestamp} -import org.apache.spark.sql.DataFrame +import java.sql.Timestamp +import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.patients.Patient +import fr.polytechnique.cmap.cnam.etl.sources.Sources class DcirPatientsSuite extends SharedContext { - import fr.polytechnique.cmap.cnam.etl.extractors.patients.DcirPatients.DcirPatientsDataFrame + "findPatientBirthDate" should "return a Dataset with the birth date for each patient" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val input: Dataset[PatientDcir] = Seq( + PatientDcir("Patient_01", 2, 31, "1975", null, Timestamp.valueOf("2006-01-15 00:00:00"), None), + PatientDcir("Patient_02", 1, 47, "1959", null, Timestamp.valueOf("2006-01-05 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))) + ).toDS() + + val expected: Dataset[PatientDcir] = Seq( + PatientDcir("Patient_01", 2, 31, "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Timestamp.valueOf("2006-01-15 00:00:00"), None), + PatientDcir("Patient_02", 1, 47, "1959", Timestamp.valueOf("1959-01-01 00:00:00"), Timestamp.valueOf("2006-01-05 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))) + ).toDS() + + // When + val result = DcirPatients.findPatientBirthDate(input) + + // Then + assertDSs(result, expected) + } + + "findGender" should "return a Dataset with the correct gender for each patient" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val input: Dataset[PatientDcir] = Seq( + PatientDcir("Patient_01", 2, 31, "1975", null, Timestamp.valueOf("2006-01-15 00:00:00"), None), + PatientDcir("Patient_01", 1, 31, "1975", null, Timestamp.valueOf("2006-01-15 00:00:00"), None), + PatientDcir("Patient_01", 2, 31, "1975", null, Timestamp.valueOf("2006-01-30 00:00:00"), None), + PatientDcir("Patient_02", 1, 47, "1975", null, Timestamp.valueOf("2006-01-05 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 47, "1975", null, Timestamp.valueOf("2006-01-15 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 47, "1975", null, Timestamp.valueOf("2006-01-30 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 47, "1975", null, Timestamp.valueOf("2006-01-30 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))) + ).toDS() + + val expected: Dataset[PatientDcir] = Seq( + PatientDcir("Patient_01", 2, 31, "1975", null, Timestamp.valueOf("2006-01-15 00:00:00"), None), + PatientDcir("Patient_01", 2, 31, "1975", null, Timestamp.valueOf("2006-01-15 00:00:00"), None), + PatientDcir("Patient_01", 2, 31, "1975", null, Timestamp.valueOf("2006-01-30 00:00:00"), None), + PatientDcir("Patient_02", 1, 47, "1975", null, Timestamp.valueOf("2006-01-05 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 47, "1975", null, Timestamp.valueOf("2006-01-15 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 47, "1975", null, Timestamp.valueOf("2006-01-30 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 47, "1975", null, Timestamp.valueOf("2006-01-30 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))) + ).toDS() + + // When + val result = DcirPatients.findPatientGender(input) - "findBirthYears" should "return a DataFrame with the birth year for each patient" in { + // Then + assertDSs(result, expected) + } + + "findDeathDate" should "return a Dataset with the correct death date for each patient" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val input: DataFrame = Seq( - ("Patient_01", "1975"), - ("Patient_01", "1975"), - ("Patient_01", "000000"), - ("Patient_01", "999999"), - ("Patient_01", "2075"), - ("Patient_01", "1975"), - ("Patient_02", "1959"), - ("Patient_02", "1959"), - ("Patient_02", "9999"), - ("Patient_02", "9999") - ).toDF("patientID", "birthYear") - - val expectedResult: DataFrame = Seq( - ("Patient_01", "1975"), - ("Patient_02", "1959") - ).toDF("patientID", "birthYear") + val input: Dataset[PatientDcir] = Seq( + PatientDcir("Patient_01", 1, 1, "1975", null, Timestamp.valueOf("1975-01-01 00:00:00"), Some(null.asInstanceOf[Timestamp])), + PatientDcir("Patient_02", 1, 34, "1975", null, Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 30, "1975", null, Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2015-03-13 00:00:00"))), + PatientDcir("Patient_03", 1, 1, "1975", null, Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("1976-03-13 00:00:00"))), + PatientDcir("Patient_04", 1, 45, "1975", null, Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2020-03-13 00:00:00"))) + ).toDS() + + val expected: Dataset[PatientDcir] = Seq( + PatientDcir("Patient_01", 1, 1, "1975", null, Timestamp.valueOf("1975-01-01 00:00:00"), Some(null.asInstanceOf[Timestamp])), + PatientDcir("Patient_04", 1, 45, "1975", null, Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2020-03-13 00:00:00"))), + PatientDcir("Patient_03", 1, 1, "1975", null, Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("1976-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 34, "1975", null, Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 30, "1975", null, Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))) + ).toDS() // When - val result = input.findBirthYears + val result = DcirPatients.findPatientDeathDate(input) // Then - assertDFs(result, expectedResult) + assertDSs(result, expected) } - "groupByIdAndAge" should "return a DataFrame with data aggregated by patient ID and age" in { + "convert PatientDcirtoPatient" should "return Dataset of Patients" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val givenDf: DataFrame = sqlContext.read.parquet("src/test/resources/expected/DCIR.parquet") - val input: DataFrame = Seq( - ("Patient_01", 31, 2, Date.valueOf("2006-01-15"), None), - ("Patient_01", 31, 2, Date.valueOf("2006-01-15"), None), - ("Patient_01", 31, 2, Date.valueOf("2006-01-30"), None), - ("Patient_02", 47, 1, Date.valueOf("2006-01-05"), Some(Date.valueOf("2009-03-13"))), - ("Patient_02", 47, 1, Date.valueOf("2006-01-15"), Some(Date.valueOf("2009-03-13"))), - ("Patient_02", 47, 1, Date.valueOf("2006-01-30"), Some(Date.valueOf("2009-03-13"))), - ("Patient_02", 47, 1, Date.valueOf("2006-01-30"), Some(Date.valueOf("2009-03-13"))) - ).toDF("patientID", "age", "gender", "eventDate", "deathDate") - - val expected: DataFrame = Seq( - ("Patient_01", 31, 3L, 6L, Date.valueOf("2006-01-15"), Date.valueOf("2006-01-30"), - None), - ("Patient_02", 47, 4L, 4L, Date.valueOf("2006-01-05"), Date.valueOf("2006-01-30"), - Some(Date.valueOf("2009-03-13"))) - ).toDF( - "patientID", "age", "genderCount", "genderSum", "minEventDate", "maxEventDate", - "deathDate" - ) + val input: Dataset[PatientDcir] = Seq( + PatientDcir("Patient_01", 2, 45, "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Timestamp.valueOf("1975-01-01 00:00:00"), None), + PatientDcir("Patient_02", 1, 49, "1959", Timestamp.valueOf("1959-01-01 00:00:00"), Timestamp.valueOf("1959-01-01 00:00:00"), Some(Timestamp.valueOf("2008-01-25 00:00:00"))) + ).toDS() + + val expected: Dataset[Patient] = Seq( + Patient("Patient_01", 2, Timestamp.valueOf("1975-01-01 00:00:00"), None), + Patient("Patient_02", 1, Timestamp.valueOf("1959-01-01 00:00:00"), Some(Timestamp.valueOf("2008-01-25 00:00:00"))) + ).toDS() // When - val result = input.groupByIdAndAge + val result = DcirPatients.fromDerivedPatienttoPatient(input) // Then - assertDFs(result, expected) + assertDSs(result, expected) } - "estimateFields" should "return a Dataset[Patient] from a DataFrame with aggregated data" in { + "getInput" should "read file" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val input: DataFrame = Seq( - ("Patient_01", 31, 3, 6, "1975", Date.valueOf("2006-01-15"), - Date.valueOf("2006-01-30"), None), - ("Patient_02", 47, 4, 4, "1959", Date.valueOf("2006-01-05"), - Date.valueOf("2006-01-30"), Some(Date.valueOf("2009-03-13"))) - ).toDF( - "patientID", "age", "genderCount", "genderSum", "birthYear", "minEventDate", - "maxEventDate", "deathDate" - ) - - val expected: DataFrame = Seq( - Patient( - patientID = "Patient_01", - gender = 2, - birthDate = Timestamp.valueOf("1975-01-01 00:00:00"), - deathDate = None - ), - Patient( - patientID = "Patient_02", - gender = 1, - birthDate = Timestamp.valueOf("1959-01-01 00:00:00"), - deathDate = Some(Timestamp.valueOf("2009-03-13 00:00:00")) - ) - ).toDF + val dcir = spark.read.parquet("src/test/resources/test-input/DCIR.parquet") + val sources = Sources(dcir = Some(dcir)) + + val expected: Dataset[PatientDcir] = Seq( + PatientDcir("Patient_01", 2, 31, "1975", null, null, null), + PatientDcir("Patient_01", 2, 31, "1975", null, Timestamp.valueOf("2006-01-15 00:00:00"), null), + PatientDcir("Patient_01", 2, 31, "1975", null, Timestamp.valueOf("2006-01-30 00:00:00"), null), + PatientDcir("Patient_02", 1, 47, "1959", null, Timestamp.valueOf("2006-01-15 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 47, "1959", null, Timestamp.valueOf("2006-01-30 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 47, "1959", null, Timestamp.valueOf("2006-01-30 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientDcir("Patient_02", 1, 47, "1959", null, Timestamp.valueOf("2006-01-05 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))) + ).toDS() // When - val result = input.estimateFields.toDF + val result = DcirPatients.getInput(sources) // Then - assertDFs(result, expected) + assertDSs(result, expected) } - "transform" should "return the correct data in a Dataset[Patient] for a known input" in { + "extract" should "build patients with actual data" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val dcir: DataFrame = sqlCtx.read.parquet("src/test/resources/expected/DCIR.parquet") - val expected: DataFrame = Seq( - Patient( - patientID = "Patient_01", - gender = 2, - birthDate = Timestamp.valueOf("1975-01-01 00:00:00"), - deathDate = None - ), - Patient( - patientID = "Patient_02", - gender = 1, - birthDate = Timestamp.valueOf("1959-01-01 00:00:00"), - deathDate = Some(Timestamp.valueOf("2009-03-13 00:00:00")) - ) - ).toDF + val dcir = spark.read.parquet("src/test/resources/test-input/DCIR.parquet") + val sources = Sources(dcir = Some(dcir)) + + val expected: Dataset[Patient] = Seq( + Patient("Patient_01", 2, Timestamp.valueOf("1975-01-01 00:00:00"), None), + Patient("Patient_02", 1, Timestamp.valueOf("1959-01-01 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))) + ).toDS() // When - val result = DcirPatients.extract(dcir, 1, 2, 1900, 2020).toDF + val result = DcirPatients.extract(sources) // Then - assertDFs(result, expected) + assertDSs(result, expected) } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatientsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatientsSuite.scala index 2af3bf26..d6409102 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatientsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/HadPatientsSuite.scala @@ -1,95 +1,141 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients import java.sql.Timestamp -import org.apache.spark.sql.DataFrame -import org.apache.spark.sql.functions._ +import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.patients.Patient +import fr.polytechnique.cmap.cnam.etl.sources.Sources class HadPatientsSuite extends SharedContext { - import fr.polytechnique.cmap.cnam.etl.extractors.patients.HadPatients.HadPatientsDataFrame + "findBirthDate" should "return the same dataset" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val input: Dataset[PatientHad] = Seq( + PatientHad("Patient_01", 9, "01", "2009", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + PatientHad("Patient_02", 9, "03", "2010", 2, null, Some(Timestamp.valueOf("2010-03-01 00:00:00"))) + ).toDS() - "getDeathDates" should "collect death dates correctly from flat HAD" in { + val expected: Dataset[PatientHad] = Seq( + PatientHad("Patient_01", 9, "01", "2009", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + PatientHad("Patient_02", 9, "03", "2010", 2, null, Some(Timestamp.valueOf("2010-03-01 00:00:00"))) + ).toDS() + + // When + val result: Dataset[PatientHad] = HadPatients.findPatientBirthDate(input) + + // Then + assertDSs(result, expected) + } + + "findGender" should "return the same dataset" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val input: DataFrame = Seq( - ("Patient_01", 1, 2, 1983), - ("Patient_02", 9, 3, 1986) - ).toDF("patientID", "SOR_MOD", "SOR_MOI", "SOR_ANN") + val input: Dataset[PatientHad] = Seq( + PatientHad("Patient_01", 9, "01", "2009", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + PatientHad("Patient_02", 9, "03", "2010", 2, null, Some(Timestamp.valueOf("2010-03-01 00:00:00"))) + ).toDS() - val expected: DataFrame = Seq( - ("Patient_02", Timestamp.valueOf("1986-03-01 00:00:00")) - ).toDF("patientID", "deathDate") + val expected: Dataset[PatientHad] = Seq( + PatientHad("Patient_01", 9, "01", "2009", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + PatientHad("Patient_02", 9, "03", "2010", 2, null, Some(Timestamp.valueOf("2010-03-01 00:00:00"))) + ).toDS() // When - val result: DataFrame = input.getDeathDates(9).select(col("patientID"), col("deathDate")) + val result: Dataset[PatientHad] = HadPatients.findPatientGender(input) // Then - assertDFs(result, expected) + assertDSs(result, expected) } - it should "choose minimum death date if a patient has more than one death dates" in { + "findDeathDate" should "choose minimum death date if a patient has more than one death dates" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val input: DataFrame = Seq( - ("Patient_01", 9, 2, 1985), - ("Patient_01", 9, 4, 1980) - ).toDF("patientID", "SOR_MOD", "SOR_MOI", "SOR_ANN") + val input: Dataset[PatientHad] = Seq( + PatientHad("Patient_01", 1, "2", "1985", 1, null, None), + PatientHad("Patient_02", 9, "3", "1986", 1, null, Some(Timestamp.valueOf("1986-03-01 00:00:00"))), + PatientHad("Patient_03", 9, "4", "1980", 1, null, Some(Timestamp.valueOf("1980-04-01 00:00:00"))), + PatientHad("Patient_03", 9, "4", "1984", 1, null, Some(Timestamp.valueOf("1984-04-01 00:00:00"))), + PatientHad("Patient_04", 3, "5", "1995", 1, null, None) + ).toDS() + + val expected: Dataset[PatientHad] = Seq( + PatientHad("Patient_02", 9, "3", "1986", 1, null, Some(Timestamp.valueOf("1986-03-01 00:00:00"))), + PatientHad("Patient_03", 9, "4", "1980", 1, null, Some(Timestamp.valueOf("1980-04-01 00:00:00"))) + ).toDS() - val expected: DataFrame = Seq( - ("Patient_01", Timestamp.valueOf("1980-04-01 00:00:00")) - ).toDF("patientID", "deathDate") + // When + val result: Dataset[PatientHad] = HadPatients.findPatientDeathDate(input) + + // Then + assertDSs(result, expected) + } + + "convert PatientHadtoPatient" should "return Dataset of Patients" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val input: Dataset[PatientHad] = Seq( + PatientHad("Patient_01", 9, "01", "2009", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + PatientHad("Patient_02", 9, "03", "2000", 2, null, Some(Timestamp.valueOf("2000-03-01 00:00:00"))) + ).toDS() + + val expected: Dataset[Patient] = Seq( + Patient("Patient_01", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + Patient("Patient_02", 2, null, Some(Timestamp.valueOf("2000-03-01 00:00:00"))) + ).toDS() // When - val result: DataFrame = input.getDeathDates(9).select(col("patientID"), col("deathDate")) + val result = HadPatients.fromDerivedPatienttoPatient(input) // Then - assertDFs(result, expected) + assertDSs(result, expected) } - "transform" should "return correct Dataset" in { + "getInput" should "read file" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val had: DataFrame = Seq( - ("Patient_01", 1, 2, 1980), - ("Patient_02", 9, 3, 1986), - ("Patient_03", 9, 4, 1980), - ("Patient_03", 9, 4, 1984), - ("Patient_04", 3, 5, 1995) - ).toDF("NUM_ENQ", "HAD_B__SOR_MOD", "HAD_B__SOR_MOI", "HAD_B__SOR_ANN") - - val expected: DataFrame = Seq( - ("Patient_02", Timestamp.valueOf("1986-03-01 00:00:00")), - ("Patient_03", Timestamp.valueOf("1980-04-01 00:00:00")) - ).toDF("patientID", "deathDate") + val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") + val sources = Sources(had = Some(had)) + + val expected: Dataset[PatientHad] = Seq( + PatientHad("patient01", 8, "1", "2019", 0, null, Some(Timestamp.valueOf("2019-01-01 00:00:00"))), + PatientHad("patient01", -1, "1", "2019", 0, null, Some(Timestamp.valueOf("2019-01-01 00:00:00"))), + PatientHad("patient02", 0, "1", "2019", 0, null, Some(Timestamp.valueOf("2019-01-01 00:00:00"))), + PatientHad("patient02", 0, "1", "2019", 0, null, Some(Timestamp.valueOf("2019-01-01 00:00:00"))) + ).toDS() // When - val result = HadPatients.extract(had) + val result = HadPatients.getInput(sources) // Then - assertDFs(result.toDF, expected) + assertDSs(result, expected) } - "extract" should "extract target HadPatients" in { + "extract" should "build patients with actual data" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given val had = spark.read.parquet("src/test/resources/test-input/HAD.parquet") + val sources = Sources(had = Some(had)) - val result = HadPatients.extract(had) + val expected: Dataset[Patient] = Seq.empty[Patient].toDS() - val expected: DataFrame = Seq.empty[ - (String, Timestamp) - ].toDF("patientID", "deathDate") + // When + val result = HadPatients.extract(sources) // Then assertDSs(result, expected) } + } \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatientsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatientsSuite.scala index 2c701559..69c6c377 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatientsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/IrBenPatientsSuite.scala @@ -3,157 +3,144 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients import java.sql.Timestamp -import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.patients.Patient +import fr.polytechnique.cmap.cnam.etl.sources.Sources class IrBenPatientsSuite extends SharedContext { - import fr.polytechnique.cmap.cnam.etl.extractors.patients.IrBenPatients.IrBenPatientsDataFrame - - "getBirthDates" should "collect birth dates correctly from IR_BEN_R" in { + "findBirthDate" should "return correct birth dates" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val irBen: DataFrame = Seq( - ("Patient_01", 1, 1975), - ("Patient_02", 2, 1976), - ("Patient_03", 3, 1977), - ("Patient_04", 4, 1895) - ).toDF("patientID", "BEN_NAI_MOI", "BEN_NAI_ANN") - - val expected: DataFrame = Seq( - ("Patient_01", Timestamp.valueOf("1975-01-01 00:00:00")), - ("Patient_02", Timestamp.valueOf("1976-02-01 00:00:00")), - ("Patient_03", Timestamp.valueOf("1977-03-01 00:00:00")) - ).toDF("patientID", "birthDate") + val irBen: Dataset[PatientIrBen] = Seq( + PatientIrBen("Patient_01", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(null.asInstanceOf[Timestamp])), + PatientIrBen("Patient_02", 2, "2", "1976", Timestamp.valueOf("1976-02-01 00:00:00"), Some(null.asInstanceOf[Timestamp])) + ).toDS() - // When - val result = irBen.getBirthDate() + val expected: Dataset[PatientIrBen] = Seq( + PatientIrBen("Patient_01", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(null.asInstanceOf[Timestamp])), + PatientIrBen("Patient_02", 2, "2", "1976", Timestamp.valueOf("1976-02-01 00:00:00"), Some(null.asInstanceOf[Timestamp])) + ).toDS() - // Then - assertDFs(result, expected) - } - - it should "throw an exception in case of conflicting birth dates" in { - val sqlCtx = sqlContext - import sqlCtx.implicits._ + //When + val result = IrBenPatients.findPatientBirthDate(irBen) - // Given - val irBen: DataFrame = Seq( - ("Patient_01", 1, 1975), - ("Patient_01", 2, 1976) - ).toDF("patientID", "BEN_NAI_MOI", "BEN_NAI_ANN") - - // Then - intercept[Exception] { - irBen.getBirthDate() - } + //Then + assertDSs(result, expected) } - "getGender" should "return correct gender" in { + "findGender" should "return correct gender" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val input: DataFrame = Seq( - ("Patient_01", 1), - ("Patient_02", 2), - ("Patient_02", 2) - ).toDF("patientID", "BEN_SEX_COD") + val input: Dataset[PatientIrBen] = Seq( + PatientIrBen("Patient_01", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(null.asInstanceOf[Timestamp])), + PatientIrBen("Patient_02", 2, "2", "1976", Timestamp.valueOf("1976-02-01 00:00:00"), Some(null.asInstanceOf[Timestamp])) + ).toDS() - val expected: DataFrame = Seq( - ("Patient_01", 1), - ("Patient_02", 2) - ).toDF("patientID", "gender") + val expected: Dataset[PatientIrBen] = Seq( + PatientIrBen("Patient_01", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(null.asInstanceOf[Timestamp])), + PatientIrBen("Patient_02", 2, "2", "1976", Timestamp.valueOf("1976-02-01 00:00:00"), Some(null.asInstanceOf[Timestamp])) + ).toDS() // When - val result = input.getGender + val result = IrBenPatients.findPatientGender(input) // Then - assertDFs(result, expected) + assertDSs(result, expected) } - it should "throw an exception in case of conflicting sex code" in { + "findDeathDate" should "find death dates correctly" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val input: DataFrame = Seq( - ("Patient_01", 1), - ("Patient_01", 2) - ).toDF("patientID", "BEN_SEX_COD") + val input: Dataset[PatientIrBen] = Seq( + PatientIrBen("Patient_01", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(null.asInstanceOf[Timestamp])), + PatientIrBen("Patient_02", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientIrBen("Patient_02", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2015-03-13 00:00:00"))), + PatientIrBen("Patient_03", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("1976-03-13 00:00:00"))), + PatientIrBen("Patient_04", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2020-03-13 00:00:00"))) + ).toDS() + + val expected: Dataset[PatientIrBen] = Seq( + PatientIrBen("Patient_01", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(null.asInstanceOf[Timestamp])), + PatientIrBen("Patient_04", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2020-03-13 00:00:00"))), + PatientIrBen("Patient_03", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("1976-03-13 00:00:00"))), + PatientIrBen("Patient_02", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientIrBen("Patient_02", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))) + ).toDS() + + // When + val result = IrBenPatients.findPatientDeathDate(input) // Then - intercept[Exception] { - input.getGender - } + assertDSs(result, expected) } - "getDeathDate" should "collect death dates correctly from IR_BEN_R" in { + "convert PatientIrBentoPatient" should "return Dataset of Patients" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val irBen: DataFrame = Seq( - ("Patient_01", None), - ("Patient_02", Some(Timestamp.valueOf("2009-03-13 00:00:00"))), - ("Patient_03", Some(Timestamp.valueOf("1976-03-13 00:00:00"))), - ("Patient_04", Some(Timestamp.valueOf("2020-03-13 00:00:00"))) - ).toDF("patientID", "BEN_DCD_DTE") - - val expected: DataFrame = Seq( - ("Patient_04", Timestamp.valueOf("2020-03-13 00:00:00")), - ("Patient_03", Timestamp.valueOf("1976-03-13 00:00:00")), - ("Patient_02", Timestamp.valueOf("2009-03-13 00:00:00")) - ).toDF("patientID", "deathDate") + val input: Dataset[PatientIrBen] = Seq( + PatientIrBen("Patient_01", 1, "1", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + PatientIrBen("Patient_02", 2, "3", "1977", Timestamp.valueOf("1977-03-01 00:00:00"), Some(null.asInstanceOf[Timestamp])) + ).toDS() + + val expected: Dataset[Patient] = Seq( + Patient("Patient_01", 1, Timestamp.valueOf("1975-01-01 00:00:00"), Some(Timestamp.valueOf("2009-03-13 00:00:00"))), + Patient("Patient_02", 2, Timestamp.valueOf("1977-03-01 00:00:00"), Some(null.asInstanceOf[Timestamp])) + ).toDS() // When - val result = irBen.getDeathDate + val result = IrBenPatients.fromDerivedPatienttoPatient(input) // Then - assertDFs(result, expected) + assertDSs(result, expected) } - "transform" should "return correct result" in { + "getInput" should "read file" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val irBen: DataFrame = Seq( - ("Patient_01", 1, 1, 1975, Timestamp.valueOf("2009-03-13 00:00:00")), - ("Patient_02", 2, 3, 1977, null.asInstanceOf[Timestamp]), - ("Patient_02", 2, 4, 1895, null.asInstanceOf[Timestamp]) - ).toDF("NUM_ENQ", "BEN_SEX_COD", "BEN_NAI_MOI", "BEN_NAI_ANN", "BEN_DCD_DTE") + val irBen = spark.read.parquet("src/test/resources/test-input/IR_BEN_R.parquet") + val sources = Sources(irBen = Some(irBen)) - val expected: DataFrame = Seq( - ("Patient_01", 1, Timestamp.valueOf("1975-01-01 00:00:00"), Timestamp.valueOf("2009-03-13 00:00:00")), - ("Patient_02", 2, Timestamp.valueOf("1977-03-01 00:00:00"), null.asInstanceOf[Timestamp]) - ).toDF("patientID", "gender", "birthDate", "deathDate") + val expected: Dataset[PatientIrBen] = Seq( + PatientIrBen("Patient_01", 2, "01", "1975", Timestamp.valueOf("1975-01-01 00:00:00"), null), + PatientIrBen("Patient_02", 1, "10", "1959", Timestamp.valueOf("1959-10-01 00:00:00"), Some(Timestamp.valueOf("2008-01-25 00:00:00"))) + ).toDS() // When - val result = IrBenPatients.extract(irBen, 1900, 2020) + val result = IrBenPatients.getInput(sources) // Then - assertDFs(result.toDF, expected) + assertDSs(result, expected) } - it should "deal with actual data" in { + "extract" should "build patients with actual data" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val irBen = sqlCtx.read.load("src/test/resources/expected/IR_BEN_R.parquet") + val irBen = spark.read.parquet("src/test/resources/test-input/IR_BEN_R.parquet") + val sources = Sources(irBen = Some(irBen)) - val expected: DataFrame = Seq( - ("Patient_01", 2, Timestamp.valueOf("1975-01-01 00:00:00"), null.asInstanceOf[Timestamp]), - ("Patient_02", 1, Timestamp.valueOf("1959-10-01 00:00:00"), Timestamp.valueOf("2008-01-25 00:00:00")) - ).toDF("patientID", "gender", "birthDate", "deathDate") + val expected: Dataset[Patient] = Seq( + Patient("Patient_01", 2, Timestamp.valueOf("1975-01-01 00:00:00"), null), + Patient("Patient_02", 1, Timestamp.valueOf("1959-10-01 00:00:00"), Some(Timestamp.valueOf("2008-01-25 00:00:00"))) + ).toDS() // When - val result = IrBenPatients.extract(irBen, 1900, 2020) + val result = IrBenPatients.extract(sources) // Then - assertDFs(result.toDF, expected) + assertDSs(result, expected) } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatientsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatientsSuite.scala index e5a6feee..c7ca45cf 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatientsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McoPatientsSuite.scala @@ -3,78 +3,142 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients import java.sql.Timestamp -import org.apache.spark.sql.DataFrame -import org.apache.spark.sql.functions._ +import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.patients.Patient +import fr.polytechnique.cmap.cnam.etl.sources.Sources class McoPatientsSuite extends SharedContext { - import fr.polytechnique.cmap.cnam.etl.extractors.patients.McoPatients.McoPatientsDataFrame + "findBirthDate" should "return the same dataset" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val input: Dataset[PatientMco] = Seq( + PatientMco("Patient_01", 9, "01", "2009", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + PatientMco("Patient_02", 9, "03", "2010", 2, null, Some(Timestamp.valueOf("2010-03-01 00:00:00"))) + ).toDS() + + val expected: Dataset[PatientMco] = Seq( + PatientMco("Patient_01", 9, "01", "2009", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + PatientMco("Patient_02", 9, "03", "2010", 2, null, Some(Timestamp.valueOf("2010-03-01 00:00:00"))) + ).toDS() + + // When + val result: Dataset[PatientMco] = McoPatients.findPatientBirthDate(input) + + // Then + assertDSs(result, expected) + } - "getDeathDates" should "collect death dates correctly from flat MCO" in { + "findGender" should "return the same dataset" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val input: DataFrame = Seq( - ("Patient_01", 1, 2, 1985), - ("Patient_02", 9, 3, 1986) - ).toDF("patientID", "SOR_MOD", "SOR_MOI", "SOR_ANN") + val input: Dataset[PatientMco] = Seq( + PatientMco("Patient_01", 9, "01", "2009", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + PatientMco("Patient_02", 9, "03", "2010", 2, null, Some(Timestamp.valueOf("2010-03-01 00:00:00"))) + ).toDS() - val expected: DataFrame = Seq( - ("Patient_02", Timestamp.valueOf("1986-03-01 00:00:00")) - ).toDF("patientID", "deathDate") + val expected: Dataset[PatientMco] = Seq( + PatientMco("Patient_01", 9, "01", "2009", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + PatientMco("Patient_02", 9, "03", "2010", 2, null, Some(Timestamp.valueOf("2010-03-01 00:00:00"))) + ).toDS() // When - val result: DataFrame = input.getDeathDates(9).select(col("patientID"), col("deathDate")) + val result: Dataset[PatientMco] = McoPatients.findPatientGender(input) // Then - assertDFs(result, expected) + assertDSs(result, expected) } - it should "choose minimum death date if a patient has more than one death dates" in { + "findDeathDate" should "choose minimum death date if a patient has more than one death dates" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val input: DataFrame = Seq( - ("Patient_01", 9, 2, 1985), - ("Patient_01", 9, 4, 1980) - ).toDF("patientID", "SOR_MOD", "SOR_MOI", "SOR_ANN") + val input: Dataset[PatientMco] = Seq( + PatientMco("Patient_01", 1, "2", "1985", 1, null, None), + PatientMco("Patient_02", 9, "3", "1986", 1, null, Some(Timestamp.valueOf("1986-03-01 00:00:00"))), + PatientMco("Patient_03", 9, "4", "1980", 1, null, Some(Timestamp.valueOf("1980-04-01 00:00:00"))), + PatientMco("Patient_03", 9, "4", "1984", 1, null, Some(Timestamp.valueOf("1984-04-01 00:00:00"))), + PatientMco("Patient_04", 3, "5", "1995", 1, null, None) + ).toDS() + + val expected: Dataset[PatientMco] = Seq( + PatientMco("Patient_02", 9, "3", "1986", 1, null, Some(Timestamp.valueOf("1986-03-01 00:00:00"))), + PatientMco("Patient_03", 9, "4", "1980", 1, null, Some(Timestamp.valueOf("1980-04-01 00:00:00"))) + ).toDS() + + // When + val result: Dataset[PatientMco] = McoPatients.findPatientDeathDate(input) - val expected: DataFrame = Seq( - ("Patient_01", Timestamp.valueOf("1980-04-01 00:00:00")) - ).toDF("patientID", "deathDate") + // Then + assertDSs(result, expected) + } + + "convert PatientMcotoPatient" should "return Dataset of Patients" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val input: Dataset[PatientMco] = Seq( + PatientMco("Patient_01", 9, "1", "2009", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + PatientMco("Patient_02", 9, "3", "2000", 2, null, Some(Timestamp.valueOf("2000-03-01 00:00:00"))) + ).toDS() + + val expected: Dataset[Patient] = Seq( + Patient("Patient_01", 1, null, Some(Timestamp.valueOf("2009-01-01 00:00:00"))), + Patient("Patient_02", 2, null, Some(Timestamp.valueOf("2000-03-01 00:00:00"))) + ).toDS() // When - val result: DataFrame = input.getDeathDates(9).select(col("patientID"), col("deathDate")) + val result = McoPatients.fromDerivedPatienttoPatient(input) // Then - assertDFs(result, expected) + assertDSs(result, expected) } - "transform" should "return correct Dataset" in { + "getInput" should "read file" in { val sqlCtx = sqlContext import sqlCtx.implicits._ // Given - val mco: DataFrame = Seq( - ("Patient_01", 1, 2, 1985), - ("Patient_02", 9, 3, 1986), - ("Patient_03", 9, 4, 1980), - ("Patient_03", 9, 4, 1984), - ("Patient_04", 3, 5, 1995) - ).toDF("NUM_ENQ", "MCO_B__SOR_MOD", "SOR_MOI", "SOR_ANN") - - val expected: DataFrame = Seq( - ("Patient_02", Timestamp.valueOf("1986-03-01 00:00:00")), - ("Patient_03", Timestamp.valueOf("1980-04-01 00:00:00")) - ).toDF("patientID", "deathDate") + val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") + val sources = Sources(mco = Some(mco)) + + val expected: Dataset[PatientMco] = Seq( + PatientMco("Patient_02", 5, "2", "2007", 0, null, Some(Timestamp.valueOf("2007-02-01 00:00:00"))), + PatientMco("Patient_02", 5, "2", "2007", 0, null, Some(Timestamp.valueOf("2007-02-01 00:00:00"))), + PatientMco("Patient_02", 5, "1", "2006", 0, null, Some(Timestamp.valueOf("2006-01-01 00:00:00"))), + PatientMco("Patient_02", 5, "1", "2006", 0, null, Some(Timestamp.valueOf("2006-01-01 00:00:00"))), + PatientMco("Patient_02", 5, "3", "2008", 0, null, Some(Timestamp.valueOf("2008-03-01 00:00:00"))), + PatientMco("Patient_02", 5, "3", "2008", 0, null, Some(Timestamp.valueOf("2008-03-01 00:00:00"))) + ).toDS() + + // When + val result = McoPatients.getInput(sources) + + // Then + assertDSs(result, expected) + } + + "extract" should "build patients with actual data" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val mco = spark.read.parquet("src/test/resources/test-input/MCO.parquet") + val sources = Sources(mco = Some(mco)) + + val expected: Dataset[Patient] = Seq.empty[Patient].toDS() // When - val result = McoPatients.extract(mco) + val result = McoPatients.extract(sources) // Then - assertDFs(result.toDF, expected) + assertDSs(result, expected) } } \ No newline at end of file diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatientsSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatientsSuite.scala index 9a501dc0..36e17bfc 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatientsSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/patients/McocePatientsSuite.scala @@ -2,133 +2,184 @@ package fr.polytechnique.cmap.cnam.etl.extractors.patients -import org.apache.spark.sql.functions.lit -import org.apache.spark.sql.types.TimestampType +import java.sql.Timestamp +import org.apache.spark.sql.Dataset import fr.polytechnique.cmap.cnam.SharedContext -import fr.polytechnique.cmap.cnam.etl.implicits -import fr.polytechnique.cmap.cnam.etl.extractors.patients.McocePatients.McocePatientsImplicit +import fr.polytechnique.cmap.cnam.etl.patients.Patient import fr.polytechnique.cmap.cnam.etl.sources.Sources -import fr.polytechnique.cmap.cnam.study.fall.config.FallConfig -import fr.polytechnique.cmap.cnam.util.functions.makeTS class McocePatientsSuite extends SharedContext { - "calculateBirthYear" should "return a DataFrame with the birth year for each patient" in { - + "findPatientBirthDate" should "return a Dataset with the birth date for each patient" in { val sqlCtx = sqlContext import sqlCtx.implicits._ - //Given - val input = Seq( - ("200410", 1, 79, makeTS(2014, 4, 18)), - ("2004100010", 1, 73, makeTS(2014, 1, 9)), - ("2004100010", 1, 73, makeTS(2014, 2, 11)), - ("2004100010", 1, 74, makeTS(2014, 7, 18)), - ("2004100010", 1, 74, makeTS(2014, 12, 12)), - ("2004100010", 1, 74, makeTS(2014, 4, 15)), - ("2004100010", 1, 74, makeTS(2014, 10, 27)), - ("2004100010", 1, 74, makeTS(2014, 4, 4)), - ("2004100010", 1, 74, makeTS(2014, 11, 6)), - ("2004100010", 1, 74, makeTS(2014, 5, 2)), - ("2004100010", 1, 74, makeTS(2014, 9, 26)) - ).toDF("patientID", "sex", "age", "event_date") - - val expected = Seq( - ("200410", 1935), - ("2004100010", 1940) - ).toDF("patientID", "birth_year") - - //When - val result = input.calculateBirthYear - - //Then - assertDFs(result, expected) + // Given + val input: Dataset[PatientMcoce] = Seq( + PatientMcoce("200410", 1, 79, null, Timestamp.valueOf("2014-04-18 00:00:00"), None), + PatientMcoce("2004100010", 1, 73, null, Timestamp.valueOf("2014-01-09 00:00:00"), None), + PatientMcoce("2004100010", 1, 73, null, Timestamp.valueOf("2014-02-11 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-07-18 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-12-12 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-04-15 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-10-27 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-04-04 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-11-06 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-05-02 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-09-26 00:00:00"), None) + ).toDS() + + val expected: Dataset[PatientMcoce] = Seq( + PatientMcoce("200410", 1, 79, Timestamp.valueOf("1935-04-01 00:00:00"), Timestamp.valueOf("2014-04-18 00:00:00"), None), + PatientMcoce("2004100010", 1, 73, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-01-09 00:00:00"), None), + PatientMcoce("2004100010", 1, 73, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-02-11 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-07-18 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-12-12 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-04-15 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-10-27 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-04-04 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-11-06 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-05-02 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-09-26 00:00:00"), None) + ).toDS() + + // When + val result = McocePatients.findPatientBirthDate(input) + + // Then + assertDSs(result, expected) } - "groupByIdAndAge" should "return a DataFrame with data aggregated by patient ID and age" in { - + "findGender" should "return a Dataset with the correct gender for each patient" in { val sqlCtx = sqlContext import sqlCtx.implicits._ - //Given - val input = Seq( - ("200410", 1, 79, makeTS(2014, 4, 18)), - ("2004100010", 1, 73, makeTS(2014, 1, 9)), - ("2004100010", 1, 73, makeTS(2014, 2, 11)), - ("2004100010", 1, 74, makeTS(2014, 7, 18)), - ("2004100010", 1, 74, makeTS(2014, 12, 12)), - ("2004100010", 1, 74, makeTS(2014, 4, 15)), - ("2004100010", 1, 74, makeTS(2014, 10, 27)), - ("2004100010", 1, 74, makeTS(2014, 4, 4)), - ("2004100010", 1, 74, makeTS(2014, 11, 6)), - ("2004100010", 1, 74, makeTS(2014, 5, 2)), - ("2004100010", 1, 74, makeTS(2014, 9, 26)) - ).toDF("patientID", "sex", "age", "event_date") - - val expected = Seq( - ("200410", 79, 1.0, 1.0, makeTS(2014, 4, 18), makeTS(2014, 4, 18)), - ("2004100010", 73, 2.0, 2.0, makeTS(2014, 1, 9), makeTS(2014, 2, 11)), - ("2004100010", 74, 8.0, 8.0, makeTS(2014, 4, 4), makeTS(2014, 12, 12)) - ).toDF("patientID", "age", "sum_sex", "count_sex", "min_event_date", "max_event_date") - - //When - val result = input.groupByIdAndAge - - //Then - assertDFs(result, expected) - + // Given + val input: Dataset[PatientMcoce] = Seq( + PatientMcoce("200410", 1, 79, Timestamp.valueOf("1935-04-01 00:00:00"), Timestamp.valueOf("2014-04-18 00:00:00"), None), + PatientMcoce("2004100010", 1, 73, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-01-09 00:00:00"), None), + PatientMcoce("2004100010", 1, 73, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-02-11 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-07-18 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-12-12 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-04-15 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-10-27 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-04-04 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-11-06 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-05-02 00:00:00"), None), + PatientMcoce("2004100010", 2, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-09-26 00:00:00"), None) + ).toDS() + + val expected: Dataset[PatientMcoce] = Seq( + PatientMcoce("200410", 1, 79, Timestamp.valueOf("1935-04-01 00:00:00"), Timestamp.valueOf("2014-04-18 00:00:00"), None), + PatientMcoce("2004100010", 1, 73, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-01-09 00:00:00"), None), + PatientMcoce("2004100010", 1, 73, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-02-11 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-07-18 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-12-12 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-04-15 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-10-27 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-04-04 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-11-06 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-05-02 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, Timestamp.valueOf("1940-03-01 00:00:00"), Timestamp.valueOf("2014-09-26 00:00:00"), None) + ).toDS() + + // When + val result = McocePatients.findPatientGender(input) + + // Then + assertDSs(result, expected) } - "calculateBirthDateAndGender" should "return a DataFrame with aggregated data" in { - + "findDeathDate" should "return the same dataset" in { val sqlCtx = sqlContext import sqlCtx.implicits._ - //Given - val input = Seq( - ("200410", 79, 1.0, 1.0, makeTS(2014, 4, 18), makeTS(2014, 4, 18), 1935), - ("2004100010", 73, 2.0, 2.0, makeTS(2014, 1, 9), makeTS(2014, 2, 11), 1940), - ("2004100010", 74, 8.0, 8.0, makeTS(2014, 4, 4), makeTS(2014, 12, 12), 1940) - ).toDF("patientID", "age", "sum_sex", "count_sex", "min_event_date", "max_event_date", "birth_year") - val expected = Seq( - ("200410", 1, makeTS(1935, 4, 1)), - ("2004100010", 1, makeTS(1940, 3, 1)) - ).toDF("patientID", "gender", "birthDate") + // Given + val input: Dataset[PatientMcoce] = Seq( + PatientMcoce("Patient_01", 2, 45, Timestamp.valueOf("1975-01-01 00:00:00"), Timestamp.valueOf("2020-01-01 00:00:00"), None), + PatientMcoce("Patient_02", 1, 50, Timestamp.valueOf("1959-01-01 00:00:00"), Timestamp.valueOf("2010-01-01 00:00:00"), None) + ).toDS() - //When - val result = input.calculateBirthDateAndGender + val expected: Dataset[PatientMcoce] = Seq( + PatientMcoce("Patient_01", 2, 45, Timestamp.valueOf("1975-01-01 00:00:00"), Timestamp.valueOf("2020-01-01 00:00:00"), None), + PatientMcoce("Patient_02", 1, 50, Timestamp.valueOf("1959-01-01 00:00:00"), Timestamp.valueOf("2010-01-01 00:00:00"), None) + ).toDS() - //Then - assertDFs(result, expected) + // When + val result: Dataset[PatientMcoce] = McocePatients.findPatientDeathDate(input) + // Then + assertDSs(result, expected) } - "extract" should "return the correct data in a Dataset[Patient] for a known input" in { + "convert PatientMcocetoPatient" should "return Dataset of Patients" in { val sqlCtx = sqlContext import sqlCtx.implicits._ - //Given - val fallConfig = FallConfig.load("", "test") - val patientsConfig = PatientsConfig(fallConfig.base.studyStart) - import implicits.SourceReader - val sources = Sources.sanitize(sqlContext.readSources(fallConfig.input)) - val expected = Seq( - ("200410", 1, makeTS(1935, 4, 1)), - ("2004100010", 1, makeTS(1940, 3, 1)) - ).toDF("patientID", "gender", "birthDate") - .withColumn("deathDate", lit(null).cast(TimestampType)) + // Given + val input: Dataset[PatientMcoce] = Seq( + PatientMcoce("Patient_01", 2, 45, Timestamp.valueOf("1975-01-01 00:00:00"), Timestamp.valueOf("2020-01-01 00:00:00"), None), + PatientMcoce("Patient_02", 1, 50, Timestamp.valueOf("1959-01-01 00:00:00"), Timestamp.valueOf("2010-01-01 00:00:00"), None) + ).toDS() + val expected: Dataset[Patient] = Seq( + Patient("Patient_01", 2, Timestamp.valueOf("1975-01-01 00:00:00"), None), + Patient("Patient_02", 1, Timestamp.valueOf("1959-01-01 00:00:00"), None) + ).toDS() - //When - val result = McocePatients.extract( - sources.mcoCe.get, patientsConfig.minGender, - patientsConfig.maxGender, patientsConfig.minYear, patientsConfig.maxYear - ).toDF() + // When + val result = McocePatients.fromDerivedPatienttoPatient(input) - //Then - assertDFs(result, expected) + // Then + assertDSs(result, expected) + } + + "getInput" should "read file" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + // Given + val mcoce = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") + val sources = Sources(mcoCe = Some(mcoce)) + + val expected: Dataset[PatientMcoce] = Seq( + PatientMcoce("200410", 1, 79, null, Timestamp.valueOf("2014-04-18 00:00:00"), None), + PatientMcoce("2004100010", 1, 73, null, Timestamp.valueOf("2014-01-09 00:00:00"), None), + PatientMcoce("2004100010", 1, 73, null, Timestamp.valueOf("2014-02-11 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-07-18 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-12-12 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-04-15 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-10-27 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-04-04 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-11-06 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-05-02 00:00:00"), None), + PatientMcoce("2004100010", 1, 74, null, Timestamp.valueOf("2014-09-26 00:00:00"), None) + ).toDS() + + // When + val result = McocePatients.getInput(sources) + + // Then + assertDSs(result, expected) } + "extract" should "build patients with actual data" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val mcoce = spark.read.parquet("src/test/resources/test-input/MCO_CE.parquet") + val sources = Sources(mcoCe = Some(mcoce)) + val expected: Dataset[Patient] = Seq( + Patient("200410", 1, Timestamp.valueOf("1935-04-01 00:00:00"), None), + Patient("2004100010", 1, Timestamp.valueOf("1940-03-01 00:00:00"), None) + ).toDS() + + // When + val result = McocePatients.extract(sources) + + // Then + assertDSs(result, expected) + } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/patients/PatientFiltersSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/patients/PatientFiltersSuite.scala new file mode 100644 index 00000000..4a384fed --- /dev/null +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/transformers/patients/PatientFiltersSuite.scala @@ -0,0 +1,42 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.transformers.patients + +import org.apache.spark.sql.Dataset +import fr.polytechnique.cmap.cnam.SharedContext +import fr.polytechnique.cmap.cnam.etl.extractors.patients.PatientsConfig +import fr.polytechnique.cmap.cnam.etl.patients.Patient +import fr.polytechnique.cmap.cnam.util.functions.makeTS + +class PatientFiltersSuite extends SharedContext { + + "transform" should "return the correct data in a Dataset[Patient] for a known input" in { + val sqlCtx = sqlContext + import sqlCtx.implicits._ + + // Given + val config = PatientsConfig(ageReferenceDate = makeTS(2006, 1, 1)) + val input: Dataset[Patient] = Seq( + Patient("Patient_01", 1, makeTS(1945, 1, 1), None), + Patient("Patient_02", 1, makeTS(1956, 2, 1), Some(makeTS(2009, 3, 13))), + Patient("Patient_03", 2, makeTS(1937, 3, 1), Some(makeTS(1980, 4, 1))), + Patient("Patient_04", 2, makeTS(1966, 2, 1), Some(makeTS(2009, 3, 13))), + Patient("Patient_05", 1, makeTS(1935, 4, 1), Some(makeTS(2020, 3, 13))), + Patient("Patient_06", 3, makeTS(1920, 8, 1), Some(makeTS(1980, 8, 1))), + Patient("Patient_07", 3, makeTS(2000, 8, 1), Some(makeTS(1980, 8, 1))) + ).toDS() + + // When + val result = new PatientFilters(config).filterPatients(input) + val expected: Dataset[Patient] = Seq( + Patient("Patient_01", 1, makeTS(1945, 1, 1), None), + Patient("Patient_02", 1, makeTS(1956, 2, 1), Some(makeTS(2009, 3, 13))), + Patient("Patient_03", 2, makeTS(1937, 3, 1), Some(makeTS(1980, 4, 1))), + Patient("Patient_04", 2, makeTS(1966, 2, 1), Some(makeTS(2009, 3, 13))) + ).toDS() + + // Then + assertDSs(result, expected) + } + +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtractorTransformSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtractorTransformSuite.scala index 90cb6fad..a5ef1ae7 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtractorTransformSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainExtractorTransformSuite.scala @@ -2,16 +2,17 @@ package fr.polytechnique.cmap.cnam.study.fall +import org.apache.spark.sql.{Encoders, SparkSession} import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.events._ -import fr.polytechnique.cmap.cnam.etl.extractors.patients.{Patients, PatientsConfig} +import fr.polytechnique.cmap.cnam.etl.extractors.patients.{AllPatientExtractor, PatientsConfig} import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.patients.Patient import fr.polytechnique.cmap.cnam.etl.sources.Sources +import fr.polytechnique.cmap.cnam.etl.transformers.patients.PatientFilters import fr.polytechnique.cmap.cnam.study.fall.config.FallConfig import fr.polytechnique.cmap.cnam.study.fall.extractors._ import fr.polytechnique.cmap.cnam.util.reporting._ -import org.apache.spark.sql.{Encoders, SparkSession} class FallMainExtractorTransformSuite extends SharedContext { @@ -36,8 +37,8 @@ class FallMainExtractorTransformSuite extends SharedContext { assertDSs(new ActsExtractor(fallConfig.medicalActs).extract(sources)._1, spark.read.parquet(meta.get("acts").get.outputPath) .as(Encoders.bean(classOf[Event[MedicalAct]]))) - assertDSs(new Patients(PatientsConfig(fallConfig.base.studyStart)).extract(sources), - spark.read.parquet(meta.get("extract_patients").get.outputPath) + assertDSs(new PatientFilters(PatientsConfig(fallConfig.base.studyStart)).filterPatients(AllPatientExtractor.extract(sources)), + spark.read.parquet(meta.get("extract_filtered_patients").get.outputPath) .as(Encoders.bean(classOf[Patient]))) } } diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainSuite.scala index 80aec387..f14b82df 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/study/fall/FallMainSuite.scala @@ -6,7 +6,6 @@ import fr.polytechnique.cmap.cnam.SharedContext import fr.polytechnique.cmap.cnam.etl.implicits import fr.polytechnique.cmap.cnam.etl.sources.Sources import fr.polytechnique.cmap.cnam.study.fall.config.FallConfig -import org.apache.spark.sql.functions.lit class FallMainSuite extends SharedContext { @@ -45,8 +44,9 @@ class FallMainSuite extends SharedContext { import implicits.SourceReader val sources = Sources.sanitize(sqlContext.readSources(fallConfig.input)) val expectedOutputPaths = List( - "target/test/output/drug_purchases/data", "target/test/output/extract_patients/data", - "target/test/output/filter_patients/data", "target/test/output/exposures/data" + "target/test/output/drug_purchases/data", "target/test/output/extract_raw_patients/data", + "target/test/output/extract_filtered_patients/data", "target/test/output/filter_patients/data", + "target/test/output/exposures/data" ) val expectedOutputTypes = List("dispensations", "patients", "exposures")