diff --git a/.github/workflows/ruby-unit-tests.yml b/.github/workflows/ruby-unit-tests.yml index 1b1f040d0..d8eadf4f0 100644 --- a/.github/workflows/ruby-unit-tests.yml +++ b/.github/workflows/ruby-unit-tests.yml @@ -2,6 +2,10 @@ name: Ruby Unit Tests on: push: + branches: + - '**' + tags-ignore: + - '**' # ignore all tag pushes pull_request: jobs: @@ -18,7 +22,7 @@ jobs: - name: create config.rb file run: cp config/config.test.rb config/config.rb - name: Build docker compose - run: docker compose --profile 4store build #profile flag is set in order to build all containers in this step + run: docker compose --profile 4store build # profile flag is set in order to build all containers in this step - name: Run unit tests # unit tests are run inside a container # http://docs.codecov.io/docs/testing-with-docker @@ -26,7 +30,7 @@ jobs: ci_env=`bash <(curl -s https://codecov.io/env)` docker compose run $ci_env -e CI --rm ${{ matrix.backend }} bundle exec rake test TESTOPTS='-v' - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} flags: unittests diff --git a/Gemfile b/Gemfile index 11561bdb7..95c26c908 100644 --- a/Gemfile +++ b/Gemfile @@ -26,6 +26,8 @@ group :test do gem 'email_spec' gem 'minitest', '~> 4' gem 'minitest-reporters', '>= 0.5.0' + gem 'mocha', '~> 2.7' + gem 'mock_redis', '~> 0.5' gem 'pry' gem 'rack-test', '~> 0.6' gem 'simplecov' @@ -37,7 +39,7 @@ group :development do end # NCBO gems (can be from a local dev path or from rubygems/git) gem 'goo', github: 'ncbo/goo', branch: 'master' -gem 'sparql-client', github: 'ncbo/sparql-client', branch: 'master' +gem 'sparql-client', github: 'ncbo/sparql-client', tag: 'v6.3.0' gem 'public_suffix', '~> 5.1.1' gem 'net-imap', '~> 0.4.18' diff --git a/Gemfile.lock b/Gemfile.lock index fd4e234dd..6c02c5e6c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ncbo/goo.git - revision: b9019ad9e1eb78c74105fc6c6a879085066da17d + revision: 26c0a69e67ac59778a46caf51202ebd38ccac767 branch: master specs: goo (0.0.2) @@ -16,8 +16,8 @@ GIT GIT remote: https://github.com/ncbo/sparql-client.git - revision: e89c26aa96f184dbe9b52d51e04fb3d9ba998dbc - branch: master + revision: 512edc320b43e83971835dc046b4923485e8f70e + tag: v6.3.0 specs: sparql-client (1.0.1) json_pure (>= 1.4) @@ -74,11 +74,11 @@ GEM launchy (>= 2.1, < 4.0) mail (~> 2.7) eventmachine (1.2.7) - faraday (2.13.1) + faraday (2.13.2) faraday-net_http (>= 2.0, < 3.5) json logger - faraday-net_http (3.4.0) + faraday-net_http (3.4.1) net-http (>= 0.5.0) ffi (1.17.2-aarch64-linux-gnu) ffi (1.17.2-arm64-darwin) @@ -90,7 +90,7 @@ GEM domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) - json (2.11.3) + json (2.12.2) json_pure (2.8.1) language_server-protocol (3.17.0.4) launchy (3.1.1) @@ -108,10 +108,10 @@ GEM net-pop net-smtp method_source (1.1.0) - mime-types (3.6.2) + mime-types (3.7.0) logger - mime-types-data (~> 3.2015) - mime-types-data (3.2025.0422) + mime-types-data (~> 3.2025, >= 3.2025.0507) + mime-types-data (3.2025.0708) mini_mime (1.1.5) minitest (4.7.5) minitest-reporters (0.14.24) @@ -119,6 +119,10 @@ GEM builder minitest (>= 2.12, < 5.0) powerbar + mocha (2.7.1) + ruby2_keywords (>= 0.0.5) + mock_redis (0.50.0) + redis (~> 5) multi_json (1.15.0) net-ftp (0.3.8) net-protocol @@ -156,7 +160,7 @@ GEM method_source (~> 1.0) public_suffix (5.1.1) racc (1.8.1) - rack (2.2.13) + rack (2.2.17) rack-test (0.8.3) rack (>= 1.0, < 3) rainbow (3.1.1) @@ -165,7 +169,7 @@ GEM addressable (>= 2.2) redis (5.4.0) redis-client (>= 0.22.0) - redis-client (0.24.0) + redis-client (0.25.1) connection_pool regexp_parser (2.10.0) request_store (1.7.0) @@ -194,6 +198,7 @@ GEM parser (>= 3.3.7.2) prism (~> 1.4) ruby-progressbar (1.13.0) + ruby2_keywords (0.0.5) rubyzip (1.3.0) simplecov (0.22.0) docile (~> 1.1) @@ -225,6 +230,7 @@ PLATFORMS aarch64-linux arm64-darwin-22 arm64-darwin-23 + arm64-darwin-24 x86_64-linux DEPENDENCIES @@ -238,6 +244,8 @@ DEPENDENCIES libxml-ruby minitest (~> 4) minitest-reporters (>= 0.5.0) + mocha (~> 2.7) + mock_redis (~> 0.5) multi_json (~> 1.0) net-imap (~> 0.4.18) oj (~> 3.0) diff --git a/bin/owlapi-wrapper-1.4.2.jar b/bin/owlapi-wrapper-1.5.0.jar old mode 100755 new mode 100644 similarity index 92% rename from bin/owlapi-wrapper-1.4.2.jar rename to bin/owlapi-wrapper-1.5.0.jar index 7dd3cc489..aae64a374 Binary files a/bin/owlapi-wrapper-1.4.2.jar and b/bin/owlapi-wrapper-1.5.0.jar differ diff --git a/config/schemes/ontology_submission.yml b/config/schemes/ontology_submission.yml index 750202048..6bc606291 100644 --- a/config/schemes/ontology_submission.yml +++ b/config/schemes/ontology_submission.yml @@ -56,7 +56,7 @@ version: "PAV: The version number of a resource.", "DOAP: A project release", "SCHEMA: The version of the CreativeWork embodied by a specified resource."] - extractedMetadata: true + extractedMetadata: false metadataMappings: [ "omv:version", "mod:version", "owl:versionInfo", "pav:version", "doap:release", "schema:version", "oboInOwl:data-version", "oboInOwl:version" ] #Status diff --git a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb index 229b46301..ad446a1d9 100644 --- a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb +++ b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb @@ -3,21 +3,43 @@ module Concerns module OntologySubmission module MetadataExtractor - def extract_metadata + def extract_metadata(logger = nil, heavy_extraction = true, user_params = nil) + logger ||= Logger.new(STDOUT) + logger.info('Extracting metadata from the ontology submission.') + + @submission = self version_info = extract_version ontology_iri = extract_ontology_iri + @submission.version = version_info if version_info + @submission.uri = ontology_iri if ontology_iri + @submission.save - self.version = version_info if version_info - self.uri = RDF::URI.new(ontology_iri) if ontology_iri + if heavy_extraction + begin + # Extract metadata directly from the ontology + extract_ontology_metadata(logger, user_params, skip_attrs: [:version, :uri]) + logger.info('Additional metadata extracted.') + rescue StandardError => e + e.backtrace + logger.error("Error while extracting additional metadata: #{e}") + end + end + if @submission.valid? + @submission.save + else + logger.error("Error while extracting additional metadata: #{@submission.errors}") + @submission = LinkedData::Models::OntologySubmission.find(@submission.id).first.bring_remaining + end end def extract_version + query = Goo.sparql_query_client.select(:versionInfo).distinct - .from(self.id) - .where([RDF::URI.new('http://bioportal.bioontology.org/ontologies/versionSubject'), - RDF::URI.new('http://www.w3.org/2002/07/owl#versionInfo'), - :versionInfo]) + .from(@submission.id) + .where([RDF::URI.new('http://bioportal.bioontology.org/ontologies/versionSubject'), + RDF::URI.new('http://www.w3.org/2002/07/owl#versionInfo'), + :versionInfo]) sol = query.each_solution.first || {} sol[:versionInfo]&.to_s @@ -25,12 +47,239 @@ def extract_version def extract_ontology_iri query = Goo.sparql_query_client.select(:uri).distinct - .from(self.id) + .from(@submission.id) .where([:uri, RDF::URI.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), RDF::URI.new('http://www.w3.org/2002/07/owl#Ontology')]) sol = query.each_solution.first || {} - sol[:uri]&.to_s + RDF::URI.new(sol[:uri]) if sol[:uri] + end + + # Extract additional metadata about the ontology + # First it extracts the main metadata, then the mapped metadata + def extract_ontology_metadata(logger, user_params, skip_attrs: []) + user_params = {} if user_params.nil? || !user_params + ontology_uri = @submission.uri + logger.info("Extraction metadata from ontology #{ontology_uri}") + + # go through all OntologySubmission attributes. Returns symbols + LinkedData::Models::OntologySubmission.attributes(:all).each do |attr| + next if skip_attrs.include? attr + # for attribute with the :extractedMetadata setting on, and that have not been defined by the user + attr_settings = LinkedData::Models::OntologySubmission.attribute_settings(attr) + + attr_not_excluded = user_params && !(user_params.key?(attr) && !user_params[attr].nil? && !user_params[attr].empty?) + + next unless attr_settings[:extractedMetadata] && attr_not_excluded + + # a boolean to check if a value that should be single have already been extracted + single_extracted = false + type = enforce?(attr, :list) ? :list : :string + old_value = value(attr, type) + + unless attr_settings[:namespace].nil? + property_to_extract = "#{attr_settings[:namespace].to_s}:#{attr.to_s}" + hash_results = extract_each_metadata(ontology_uri, attr, property_to_extract, logger) + single_extracted = send_value(attr, hash_results, logger) unless hash_results.empty? + end + + # extracts attribute value from metadata mappings + attr_settings[:metadataMappings] ||= [] + + attr_settings[:metadataMappings].each do |mapping| + break if single_extracted + + hash_mapping_results = extract_each_metadata(ontology_uri, attr, mapping.to_s, logger) + single_extracted = send_value(attr, hash_mapping_results, logger) unless hash_mapping_results.empty? + end + + new_value = value(attr, type) + + send_value(attr, old_value, logger) if empty_value?(new_value) && !empty_value?(old_value) + end + end + + def empty_value?(value) + value.nil? || (value.is_a?(Array) && value.empty?) || value.to_s.strip.empty? + end + + def value(attr, type) + val = @submission.send(attr.to_s) + type.eql?(:list) ? Array(val) || [] : val || '' + end + + def send_value(attr, new_value, logger) + old_val = nil + single_extracted = false + + if enforce?(attr, :list) + old_val = value(attr, :list) + old_values = old_val.dup + new_values = new_value.values + new_values = new_values.map { |v| find_or_create_agent(attr, v, logger) }.compact if enforce?(attr, :Agent) + + old_values.push(*new_values) + + @submission.send("#{attr}=", old_values.uniq) + elsif enforce?(attr, :concatenate) + # if multiple value for this attribute, then we concatenate it + # Add the concat at the very end, to easily join the content of the array + old_val = value(attr, :string) + metadata_values = old_val.split(', ') + new_values = new_value.values.map { |x| x.to_s.split(', ') }.flatten + + @submission.send("#{attr}=", (metadata_values + new_values).uniq.join(', ')) + else + new_value = new_value.values.first + + new_value = find_or_create_agent(attr, nil, logger) if enforce?(attr, :Agent) + + @submission.send("#{attr}=", new_value) + single_extracted = true + end + + unless @submission.valid? + logger.error("Error while extracting metadata for the attribute #{attr}: #{@submission.errors[attr] || @submission.errors}") + new_value&.delete if enforce?(attr, :Agent) && new_value.respond_to?(:delete) + @submission.send("#{attr}=", old_val) + end + + single_extracted + end + + # Return a hash with the best literal value for an URI + # it selects the literal according to their language: no language > english > french > other languages + def select_metadata_literal(metadata_uri, metadata_literal, hash) + return unless metadata_literal.is_a?(RDF::Literal) + + if hash.key?(metadata_uri) + if metadata_literal.has_language? + if !hash[metadata_uri].has_language? + return hash + else + case metadata_literal.language + when :en, :eng + # Take the value with english language over other languages + hash[metadata_uri] = metadata_literal + return hash + when :fr, :fre + # If no english, take french + if hash[metadata_uri].language == :en || hash[metadata_uri].language == :eng + return hash + else + hash[metadata_uri] = metadata_literal + return hash + end + else + return hash + end + end + else + # Take the value with no language in priority (considered as a default) + hash[metadata_uri] = metadata_literal + return hash + end + else + hash[metadata_uri] = metadata_literal + hash + end + end + + # A function to extract additional metadata + # Take the literal data if the property is pointing to a literal + # If pointing to an URI: first it takes the "omv:name" of the object pointed by the property, if nil it takes the "rdfs:label". + # If not found it check for "omv:firstName + omv:lastName" (for "omv:Person") of this object. And to finish it takes the "URI" + # The hash_results contains the metadataUri (objet pointed on by the metadata property) with the value we are using from it + def extract_each_metadata(ontology_uri, attr, prop_to_extract, logger) + + query_metadata = < #{prop_to_extract} ?extractedObject . + OPTIONAL { ?extractedObject omv:name ?omvname } . + OPTIONAL { ?extractedObject omv:firstName ?omvfirstname } . + OPTIONAL { ?extractedObject omv:lastName ?omvlastname } . + OPTIONAL { ?extractedObject rdfs:label ?rdfslabel } . +} +eos + Goo.namespaces.each do |prefix, uri| + query_metadata = "PREFIX #{prefix}: <#{uri}>\n" + query_metadata + end + + # logger.info(query_metadata) + # This hash will contain the "literal" metadata for each object (uri or literal) pointed by the metadata predicate + hash_results = {} + Goo.sparql_query_client.query(query_metadata).each_solution do |sol| + value = sol[:extractedObject] + if enforce?(attr, :uri) + # If the attr is enforced as URI then it directly takes the URI + uri_value = value ? RDF::URI.new(value.to_s.strip) : nil + hash_results[value] = uri_value if uri_value&.valid? + elsif enforce?(attr, :date_time) + begin + hash_results[value] = DateTime.iso8601(value.to_s) + rescue StandardError => e + logger.error("Impossible to extract DateTime metadata for #{attr}: #{value}. It should follow iso8601 standards. Error message: #{e}") + end + elsif enforce?(attr, :integer) + begin + hash_results[value] = value.to_s.to_i + rescue StandardError => e + logger.error("Impossible to extract integer metadata for #{attr}: #{value}. Error message: #{e}") + end + elsif enforce?(attr, :boolean) + case value.to_s.downcase + when 'true' + hash_results[value] = true + when 'false' + hash_results[value] = false + else + logger.error("Impossible to extract boolean metadata for #{attr}: #{value}. Error message: #{e}") + end + elsif value.is_a?(RDF::URI) + hash_results = find_object_label(hash_results, sol, value) + else + # If this is directly a literal + hash_results = select_metadata_literal(value, value, hash_results) + end + end + hash_results + end + + def find_object_label(hash_results, sol, value) + if !sol[:omvname].nil? + hash_results = select_metadata_literal(value, sol[:omvname], hash_results) + elsif !sol[:rdfslabel].nil? + hash_results = select_metadata_literal(value, sol[:rdfslabel], hash_results) + elsif !sol[:omvfirstname].nil? + hash_results = select_metadata_literal(value, sol[:omvfirstname], hash_results) + # if first and last name are defined (for omv:Person) + hash_results[value] = "#{hash_results[value]} #{sol[:omvlastname]}" unless sol[:omvlastname].nil? + elsif !sol[:omvlastname].nil? + # if only last name is defined + hash_results = select_metadata_literal(value, sol[:omvlastname], hash_results) + else + # if the object is an URI but we are requesting a String + hash_results[value] = value.to_s + end + hash_results + end + + def enforce?(attr, type) + LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(type) + end + + def find_or_create_agent(attr, old_val, logger) + agent = LinkedData::Models::Agent.where(agentType: 'person', name: old_val).first + begin + agent ||= LinkedData::Models::Agent.new(name: old_val, agentType: 'person', creator: @submission.ontology.administeredBy.first).save + rescue + logger.error("Error while extracting metadata for the attribute #{attr}: Can't create Agent #{agent.errors} ") + agent = nil + end + agent end end end diff --git a/lib/ontologies_linked_data/config/config.rb b/lib/ontologies_linked_data/config/config.rb index 482cb7995..00e5ee118 100644 --- a/lib/ontologies_linked_data/config/config.rb +++ b/lib/ontologies_linked_data/config/config.rb @@ -51,8 +51,9 @@ def config(&block) @settings.goo_redis_port ||= 6379 # Ontology Analytics Redis - @settings.ontology_analytics_redis_host ||= 'localhost' - @settings.ontology_analytics_redis_port ||= 6379 + @settings.ontology_analytics_redis_host ||= 'localhost' + @settings.ontology_analytics_redis_port ||= 6379 + @settings.ontology_analytics_redis_field ||= 'cloudflare_analytics' # PURL server config parameters @settings.enable_purl ||= false diff --git a/lib/ontologies_linked_data/mappings/mappings.rb b/lib/ontologies_linked_data/mappings/mappings.rb index 10abc75e1..be43e9454 100644 --- a/lib/ontologies_linked_data/mappings/mappings.rb +++ b/lib/ontologies_linked_data/mappings/mappings.rb @@ -5,21 +5,32 @@ module LinkedData module Mappings OUTSTANDING_LIMIT = 30 - def self.mapping_predicates() - predicates = {} - predicates["CUI"] = ["http://bioportal.bioontology.org/ontologies/umls/cui"] - predicates["SAME_URI"] = - ["http://data.bioontology.org/metadata/def/mappingSameURI"] - predicates["LOOM"] = - ["http://data.bioontology.org/metadata/def/mappingLoom"] - predicates["REST"] = - ["http://data.bioontology.org/metadata/def/mappingRest"] - return predicates - end + def self.mapping_predicates + predicates = {} + predicates["CUI"] = ["http://bioportal.bioontology.org/ontologies/umls/cui"] + predicates["SAME_URI"] = + ["http://data.bioontology.org/metadata/def/mappingSameURI"] + predicates["LOOM"] = + ["http://data.bioontology.org/metadata/def/mappingLoom"] + predicates["REST"] = + ["http://data.bioontology.org/metadata/def/mappingRest"] + return predicates + end - def self.handle_triple_store_downtime(logger=nil) - epr = Goo.sparql_query_client(:main) - status = epr.status + def self.internal_mapping_predicates + predicates = {} + predicates["SKOS:EXACT_MATCH"] = ["http://www.w3.org/2004/02/skos/core#exactMatch"] + predicates["SKOS:CLOSE_MATCH"] = ["http://www.w3.org/2004/02/skos/core#closeMatch"] + predicates["SKOS:BROAD_MATH"] = ["http://www.w3.org/2004/02/skos/core#broadMatch"] + predicates["SKOS:NARROW_MATH"] = ["http://www.w3.org/2004/02/skos/core#narrowMatch"] + predicates["SKOS:RELATED_MATH"] = ["http://www.w3.org/2004/02/skos/core#relatedMatch"] + + return predicates + end + + def self.handle_triple_store_downtime(logger = nil) + epr = Goo.sparql_query_client(:main) + status = epr.status if status[:exception] logger.info(status[:exception]) if logger @@ -145,142 +156,59 @@ def self.empty_page(page,size) return p end - def self.mappings_ontologies(sub1,sub2,page,size,classId=nil,reload_cache=false) - union_template = <<-eos -{ - GRAPH <#{sub1.id.to_s}> { - classId ?o . - } - GRAPH graph { - ?s2 ?o . - } - bind -} -eos - blocks = [] - mappings = [] - persistent_count = 0 - acr1 = sub1.id.to_s.split("/")[-3] - - if classId.nil? - acr2 = nil - acr2 = sub2.id.to_s.split("/")[-3] unless sub2.nil? - pcount = LinkedData::Models::MappingCount.where(ontologies: acr1) - pcount = pcount.and(ontologies: acr2) unless acr2.nil? - f = Goo::Filter.new(:pair_count) == (not acr2.nil?) - pcount = pcount.filter(f) - pcount = pcount.include(:count) - pcount_arr = pcount.all - persistent_count = pcount_arr.length == 0 ? 0 : pcount_arr.first.count - - return LinkedData::Mappings.empty_page(page,size) if persistent_count == 0 - end + def self.mappings_ontologies(sub1, sub2, page, size, classId = nil, reload_cache = false) + sub1, acr1 = extract_acronym(sub1) + sub2, acr2 = extract_acronym(sub2) - if classId.nil? - union_template = union_template.gsub("classId", "?s1") - else - union_template = union_template.gsub("classId", "<#{classId.to_s}>") - end - # latest_sub_ids = self.retrieve_latest_submission_ids - - mapping_predicates().each do |_source,mapping_predicate| - union_block = union_template.gsub("predicate", mapping_predicate[0]) - union_block = union_block.gsub("bind","BIND ('#{_source}' AS ?source)") + mappings = [] + persistent_count = 0 - if sub2.nil? - union_block = union_block.gsub("graph","?g") - else - union_block = union_block.gsub("graph","<#{sub2.id.to_s}>") + if classId.nil? + persistent_count = count_mappings(acr1, acr2) + return LinkedData::Mappings.empty_page(page, size) if persistent_count == 0 end - blocks << union_block - end - unions = blocks.join("\nUNION\n") - mappings_in_ontology = <<-eos -SELECT DISTINCT query_variables -WHERE { -unions -filter -} page_group -eos - query = mappings_in_ontology.gsub("unions", unions) - variables = "?s2 graph ?source ?o" - variables = "?s1 " + variables if classId.nil? - query = query.gsub("query_variables", variables) - filter = classId.nil? ? "FILTER ((?s1 != ?s2) || (?source = 'SAME_URI'))" : '' + query = mappings_ont_build_query(classId, page, size, sub1, sub2) + epr = Goo.sparql_query_client(:main) + graphs = [sub1] + unless sub2.nil? + graphs << sub2 + end + solutions = epr.query(query, graphs: graphs, reload_cache: reload_cache) + s1 = nil + s1 = RDF::URI.new(classId.to_s) unless classId.nil? + + solutions.each do |sol| + graph2 = sub2.nil? ? sol[:g] : sub2 + s1 = sol[:s1] if classId.nil? + backup_mapping = nil + + if sol[:source].to_s == "REST" + backup_mapping = LinkedData::Models::RestBackupMapping + .find(sol[:o]).include(:process, :class_urns).first + backup_mapping.process.bring_remaining + end - if sub2.nil? - query = query.gsub("graph","?g") - ont_id = sub1.id.to_s.split("/")[0..-3].join("/") + classes = get_mapping_classes_instance(s1, sub1, sol[:s2], graph2) - # latest_sub_filter_arr = latest_sub_ids.map { |_, id| "?g = <#{id}>" } - # filter += "\nFILTER (#{latest_sub_filter_arr.join(' || ')}) " + mapping = if backup_mapping.nil? + LinkedData::Models::Mapping.new(classes, sol[:source].to_s) + else + LinkedData::Models::Mapping.new( + classes, sol[:source].to_s, + backup_mapping.process, backup_mapping.id) + end - #STRSTARTS is used to not count older graphs - #no need since now we delete older graphs - filter += "\nFILTER (!STRSTARTS(str(?g),'#{ont_id}'))" - else - query = query.gsub("graph", "") - end - query = query.gsub("filter", filter) - - if size > 0 - pagination = "OFFSET offset LIMIT limit" - query = query.gsub("page_group",pagination) - limit = size - offset = (page-1) * size - query = query.gsub("limit", "#{limit}").gsub("offset", "#{offset}") - else - query = query.gsub("page_group","") - end - epr = Goo.sparql_query_client(:main) - graphs = [sub1.id] - unless sub2.nil? - graphs << sub2.id - end - solutions = epr.query(query, graphs: graphs, reload_cache: reload_cache) - s1 = nil - unless classId.nil? - s1 = RDF::URI.new(classId.to_s) - end - solutions.each do |sol| - graph2 = nil - if sub2.nil? - graph2 = sol[:g] - else - graph2 = sub2.id + mappings << mapping end - if classId.nil? - s1 = sol[:s1] - end - classes = [ read_only_class(s1.to_s,sub1.id.to_s), - read_only_class(sol[:s2].to_s,graph2.to_s) ] - backup_mapping = nil - mapping = nil - if sol[:source].to_s == "REST" - backup_mapping = LinkedData::Models::RestBackupMapping - .find(sol[:o]).include(:process).first - backup_mapping.process.bring_remaining + if size == 0 + return mappings end - if backup_mapping.nil? - mapping = LinkedData::Models::Mapping.new( - classes,sol[:source].to_s) - else - mapping = LinkedData::Models::Mapping.new( - classes,sol[:source].to_s, - backup_mapping.process,backup_mapping.id) - end - mappings << mapping - end - if size == 0 - return mappings + page = Goo::Base::Page.new(page, size, persistent_count, mappings) + return page end - page = Goo::Base::Page.new(page,size,nil,mappings) - page.aggregate = persistent_count - return page - end def self.mappings_ontology(sub,page,size,classId=nil,reload_cache=false) return self.mappings_ontologies(sub,nil,page,size,classId=classId, @@ -383,18 +311,18 @@ def self.get_rest_mapping(mapping_id) FILTER(?uuid = <#{LinkedData::Models::Base.replace_url_prefix_to_id(mapping_id)}>) FILTER(?s1 != ?s2) } LIMIT 1 -eos - epr = Goo.sparql_query_client(:main) - graphs = [LinkedData::Models::MappingProcess.type_uri] - mapping = nil - epr.query(qmappings, - graphs: graphs).each do |sol| - classes = [ read_only_class(sol[:c1].to_s,sol[:s1].to_s), - read_only_class(sol[:c2].to_s,sol[:s2].to_s) ] - process = LinkedData::Models::MappingProcess.find(sol[:o]).first - mapping = LinkedData::Models::Mapping.new(classes,"REST", - process, - sol[:uuid]) + eos + epr = Goo.sparql_query_client(:main) + graphs = [LinkedData::Models::MappingProcess.type_uri] + mapping = nil + epr.query(qmappings, + graphs: graphs).each do |sol| + classes = [read_only_class(sol[:c1].to_s, sol[:s1].to_s), + read_only_class(sol[:c2].to_s, sol[:s2].to_s)] + process = LinkedData::Models::MappingProcess.find(sol[:o]).first + mapping = LinkedData::Models::Mapping.new(classes, 'REST', + process, + sol[:uuid]) end return mapping end @@ -437,7 +365,7 @@ def self.create_rest_mapping(classes,process) graph_insert << [c.id, RDF::URI.new(rest_predicate), backup_mapping.id] Goo.sparql_update_client.insert_data(graph_insert, graph: sub.id) end - mapping = LinkedData::Models::Mapping.new(classes,"REST",process, backup_mapping.id) + mapping = LinkedData::Models::Mapping.new(classes,"REST", process, backup_mapping.id) return mapping end @@ -773,5 +701,115 @@ def self.create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) # fsave.close end + private + + def self.get_mapping_classes_instance(s1, graph1, s2, graph2) + [read_only_class(s1.to_s, graph1.to_s), + read_only_class(s2.to_s, graph2.to_s)] + end + + def self.mappings_ont_build_query(class_id, page, size, sub1, sub2) + blocks = [] + mapping_predicates.each do |_source, mapping_predicate| + blocks << mappings_union_template(class_id, sub1, sub2, + mapping_predicate[0], + "BIND ('#{_source}' AS ?source)") + end + + + + + + + filter = class_id.nil? ? "FILTER ((?s1 != ?s2) || (?source = 'SAME_URI'))" : '' + if sub2.nil? + + class_id_subject = class_id.nil? ? '?s1' : "<#{class_id.to_s}>" + source_graph = sub1.nil? ? '?g' : "<#{sub1.to_s}>" + internal_mapping_predicates.each do |_source, predicate| + blocks << <<-eos + { + GRAPH #{source_graph} { + #{class_id_subject} <#{predicate[0]}> ?s2 . + } + BIND( AS ?g) + BIND(?s2 AS ?o) + BIND ('#{_source}' AS ?source) + } + eos + end + + ont_id = sub1.to_s.split("/")[0..-3].join("/") + #STRSTARTS is used to not count older graphs + #no need since now we delete older graphs + + filter += "\nFILTER (!STRSTARTS(str(?g),'#{ont_id}')" + filter += " || " + internal_mapping_predicates.keys.map{|x| "(?source = '#{x}')"}.join('||') + filter += ")" + end + + variables = "?s2 #{sub2.nil? ? '?g' : ''} ?source ?o" + variables = "?s1 " + variables if class_id.nil? + + pagination = '' + if size > 0 + limit = size + offset = (page - 1) * size + pagination = "OFFSET #{offset} LIMIT #{limit}" + end + + query = <<-eos +SELECT DISTINCT #{variables} +WHERE { + #{blocks.join("\nUNION\n")} + #{filter} +} #{pagination} + eos + + query + end + + def self.mappings_union_template(class_id, sub1, sub2, predicate, bind) + class_id_subject = class_id.nil? ? '?s1' : "<#{class_id.to_s}>" + target_graph = sub2.nil? ? '?g' : "<#{sub2.to_s}>" + union_template = <<-eos +{ + GRAPH <#{sub1.to_s}> { + #{class_id_subject} <#{predicate}> ?o . + } + GRAPH #{target_graph} { + ?s2 <#{predicate}> ?o . + } + #{bind} +} + eos + end + + def self.count_mappings(acr1, acr2) + count = LinkedData::Models::MappingCount.where(ontologies: acr1) + count = count.and(ontologies: acr2) unless acr2.nil? + f = Goo::Filter.new(:pair_count) == (not acr2.nil?) + count = count.filter(f) + count = count.include(:count) + pcount_arr = count.all + pcount_arr.length == 0 ? 0 : pcount_arr.first.count + end + + def self.extract_acronym(submission) + sub = submission + if submission.nil? + acr = nil + elsif submission.respond_to?(:id) + # Case where sub2 is a Submission + sub = submission.id + acr = sub.to_s.split("/")[-3] + else + acr = sub.to_s + end + + return sub, acr + end + + end end -end + diff --git a/lib/ontologies_linked_data/models/category.rb b/lib/ontologies_linked_data/models/category.rb index 09b10f870..281e2dcb4 100644 --- a/lib/ontologies_linked_data/models/category.rb +++ b/lib/ontologies_linked_data/models/category.rb @@ -3,8 +3,8 @@ module Models class Category < LinkedData::Models::Base model :category, name_with: :acronym attribute :acronym, enforce: [:unique, :existence] - attribute :name, enforce: [:existence] - attribute :description + attribute :name, enforce: [:existence, :safe_text_64] + attribute :description, enforce: [:safe_text_64] attribute :created, enforce: [:date_time], default: lambda { |record| DateTime.now } attribute :parentCategory, enforce: [:category] attribute :ontologies, inverse: { on: :ontology, attribute: :hasDomain } diff --git a/lib/ontologies_linked_data/models/contact.rb b/lib/ontologies_linked_data/models/contact.rb index 9af31a952..2306135dc 100644 --- a/lib/ontologies_linked_data/models/contact.rb +++ b/lib/ontologies_linked_data/models/contact.rb @@ -2,8 +2,8 @@ module LinkedData module Models class Contact < LinkedData::Models::Base model :contact, name_with: lambda { |c| uuid_uri_generator(c) } - attribute :name, enforce: [:existence] - attribute :email, enforce: [:existence] + attribute :name, enforce: [:existence, :safe_text_128] + attribute :email, enforce: [:existence, :email] embedded true end diff --git a/lib/ontologies_linked_data/models/notes/note.rb b/lib/ontologies_linked_data/models/notes/note.rb index 01ec2dda5..27f39a0a9 100644 --- a/lib/ontologies_linked_data/models/notes/note.rb +++ b/lib/ontologies_linked_data/models/notes/note.rb @@ -6,8 +6,8 @@ module LinkedData module Models class Note < LinkedData::Models::Base model :note, name_with: lambda { |inst| uuid_uri_generator(inst) } - attribute :subject - attribute :body + attribute :subject, enforce: [:safe_text_64] + attribute :body, enforce: [:safe_text] attribute :creator, enforce: [:existence, :user] attribute :created, enforce: [:date_time], :default => lambda { |record| DateTime.now } attribute :archived, enforce: [:boolean] diff --git a/lib/ontologies_linked_data/models/notes/proposal.rb b/lib/ontologies_linked_data/models/notes/proposal.rb index 07d7d6b2d..59697a554 100644 --- a/lib/ontologies_linked_data/models/notes/proposal.rb +++ b/lib/ontologies_linked_data/models/notes/proposal.rb @@ -14,8 +14,8 @@ class Proposal < LinkedData::Models::Base model :base, name_with: lambda { |inst| uuid_uri_generator(inst) } attribute :type, enforce: [LinkedData::Models::Notes::ProposalType, :existence] - attribute :contactInfo - attribute :reasonForChange, enforce: [:existence] + attribute :contactInfo, enforce: [:safe_text_256] + attribute :reasonForChange, enforce: [:existence, :safe_text_256] # ProposalChangeHierarchy attribute :newTarget, enforce: [lambda {|inst, attr| existence(inst, attr, "ProposalChangeHierarchy")}] diff --git a/lib/ontologies_linked_data/models/ontology.rb b/lib/ontologies_linked_data/models/ontology.rb index 03cf6b5ca..9cc53cf37 100644 --- a/lib/ontologies_linked_data/models/ontology.rb +++ b/lib/ontologies_linked_data/models/ontology.rb @@ -15,7 +15,6 @@ class Ontology < LinkedData::Models::Base class ParsedSubmissionError < StandardError; end class OntologyAnalyticsError < StandardError; end - ONTOLOGY_ANALYTICS_REDIS_FIELD = "ontology_analytics" ONTOLOGY_RANK_REDIS_FIELD = "ontology_rank" DEFAULT_RANK_WEIGHT_ANALYTICS = 0.50 DEFAULT_RANK_WEIGHT_UMLS = 0.50 @@ -23,7 +22,7 @@ class OntologyAnalyticsError < StandardError; end model :ontology, :name_with => :acronym attribute :acronym, namespace: :omv, enforce: [:unique, :existence, lambda { |inst,attr| validate_acronym(inst,attr) } ] - attribute :name, :namespace => :omv, enforce: [:unique, :existence] + attribute :name, :namespace => :omv, enforce: [:unique, :existence, :safe_text_256] attribute :submissions, inverse: { on: :ontology_submission, attribute: :ontology } attribute :projects, @@ -309,7 +308,8 @@ def self.rank(weight_analytics=DEFAULT_RANK_WEIGHT_ANALYTICS, weight_umls=DEFAUL end def self.load_analytics_data - self.load_data(ONTOLOGY_ANALYTICS_REDIS_FIELD) + redis_field = LinkedData.settings.ontology_analytics_redis_field + self.load_data(redis_field) end def self.load_ranking_data diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index b19a18534..25a31a87e 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -245,6 +245,7 @@ def synchronize(&block) def URI=(value) self.uri = value end + def URI self.uri end @@ -303,13 +304,18 @@ def self.copy_file_repository(acronym, submission_id, src, filename = nil) # repository files are also accessible by the service group as intended, # we explicitly chmod the destination file to REPOSITORY_FILE_MODE. FileUtils.chmod(REPOSITORY_FILE_MODE, dst) - - raise "Unable to copy #{src} to #{dst}" unless File.exist?(dst) - - dst rescue StandardError => e - raise "Failed to copy #{src} to #{dst}: [#{e.class}] #{e.message}" + raise e.class, "Failed to copy #{src} to #{dst}: #{e.message}", e.backtrace end + + # Sanity check: ensure the file actually exists after copy and chmod + # This guards against rare cases like silent file storage failures or + # race conditions + unless File.exist?(dst) + raise IOError, "Copy operation completed without error, but file '#{dst}' does not exist" + end + + dst end def valid? diff --git a/lib/ontologies_linked_data/models/project.rb b/lib/ontologies_linked_data/models/project.rb index 5026dce26..55df92982 100644 --- a/lib/ontologies_linked_data/models/project.rb +++ b/lib/ontologies_linked_data/models/project.rb @@ -6,11 +6,11 @@ class Project < LinkedData::Models::Base attribute :creator, enforce: [:existence, :user, :list] attribute :created, enforce: [:date_time], :default => lambda {|x| DateTime.now } attribute :updated, enforce: [:date_time], :default => lambda {|x| DateTime.now } - attribute :name, enforce: [:existence] + attribute :name, enforce: [:existence, :safe_text_256] attribute :homePage, enforce: [:uri, :existence] - attribute :description, enforce: [:existence] - attribute :contacts - attribute :institution + attribute :description, enforce: [:existence, :safe_text] + attribute :contacts, enforce: [:safe_text_256] + attribute :institution, enforce: [:safe_text_256] attribute :ontologyUsed, enforce: [:ontology, :list] end end diff --git a/lib/ontologies_linked_data/models/users/user.rb b/lib/ontologies_linked_data/models/users/user.rb index 6027e3c91..d00b2ad97 100644 --- a/lib/ontologies_linked_data/models/users/user.rb +++ b/lib/ontologies_linked_data/models/users/user.rb @@ -13,11 +13,11 @@ class User < LinkedData::Models::Base attr_accessor :show_apikey model :user, name_with: :username - attribute :username, enforce: [:unique, :existence] - attribute :email, enforce: [:existence] + attribute :username, enforce: [:unique, :existence, :safe_text_56] + attribute :email, enforce: [:existence, :email] attribute :role, enforce: [:role, :list], :default => lambda {|x| [LinkedData::Models::Users::Role.default]} - attribute :firstName - attribute :lastName + attribute :firstName, enforce: [:safe_text_128] + attribute :lastName, enforce: [:safe_text_128] attribute :githubId, enforce: [:unique] attribute :orcidId, enforce: [:unique] attribute :created, enforce: [:date_time], :default => lambda { |record| DateTime.now } diff --git a/lib/ontologies_linked_data/parser/owlapi.rb b/lib/ontologies_linked_data/parser/owlapi.rb index 1a83239d7..ab3016674 100644 --- a/lib/ontologies_linked_data/parser/owlapi.rb +++ b/lib/ontologies_linked_data/parser/owlapi.rb @@ -13,7 +13,7 @@ class RDFFileNotGeneratedException < Parser::ParserException class OWLAPICommand def initialize(input_file, output_repo, opts = {}) - @owlapi_wrapper_jar_path = LinkedData.bindir + "/owlapi-wrapper-1.4.2.jar" + @owlapi_wrapper_jar_path = LinkedData.bindir + "/owlapi-wrapper-1.5.0.jar" @input_file = input_file @output_repo = output_repo @master_file = opts[:master_file] diff --git a/test/data/ontology_analytics_data.json b/test/data/ontology_analytics_data.json new file mode 100644 index 000000000..e2722ceee --- /dev/null +++ b/test/data/ontology_analytics_data.json @@ -0,0 +1,854 @@ +{ + "NCIT" : { + "2013" : { + "1" : 0, + "2" : 0, + "3" : 0, + "4" : 0, + "5" : 0, + "6" : 0, + "7" : 0, + "8" : 0, + "9" : 0, + "10" : 2850, + "11" : 1631, + "12" : 1323 + }, + "2014" : { + "1" : 1004, + "2" : 1302, + "3" : 2183, + "4" : 2191, + "5" : 1005, + "6" : 1046, + "7" : 1261, + "8" : 1329, + "9" : 1100, + "10" : 956, + "11" : 1105, + "12" : 893 + }, + "2015" : { + "1" : 840, + "2" : 30, + "3" : 50, + "4" : 20, + "5" : 30, + "6" : 10, + "7" : 100, + "8" : 80, + "9" : 20, + "10" : 90, + "11" : 200, + "12" : 50 + }, + "2016" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 520, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 730 + }, + "2017" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 220, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 170, + "11" : 750, + "12" : 730 + }, + "2018" : { + "1" : 2000, + "2" : 220, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 120, + "11" : 750, + "12" : 730 + }, + "2019" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 730 + }, + "2020" : { + "1" : 2000, + "2" : 210, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 730 + }, + "2021" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 550, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 730 + }, + "2022" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 990, + "7" : 340, + "8" : 320, + "9" : 610, + "10" : 180, + "11" : 750, + "12" : 730 + } + }, + "ONTOMA" : { + "2013" : { + "1" : 0, + "2" : 0, + "3" : 0, + "4" : 0, + "5" : 0, + "6" : 0, + "7" : 0, + "8" : 0, + "9" : 0, + "10" : 6, + "11" : 15, + "12" : 0 + }, + "2014" : { + "1" : 2, + "2" : 0, + "3" : 0, + "4" : 2, + "5" : 2, + "6" : 0, + "7" : 6, + "8" : 8, + "9" : 0, + "10" : 0, + "11" : 0, + "12" : 2 + }, + "2015" : { + "1" : 30, + "2" : 90, + "3" : 90, + "4" : 50, + "5" : 30, + "6" : 20, + "7" : 80, + "8" : 90, + "9" : 250, + "10" : 230, + "11" : 120, + "12" : 70 + }, + "2016" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 520, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 730 + }, + "2017" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 220, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 170, + "11" : 750, + "12" : 730 + }, + "2018" : { + "1" : 2000, + "2" : 220, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 120, + "11" : 750, + "12" : 730 + }, + "2019" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 730 + }, + "2020" : { + "1" : 2000, + "2" : 210, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 730 + }, + "2021" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 550, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 730 + }, + "2022" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 990, + "7" : 340, + "8" : 320, + "9" : 610, + "10" : 180, + "11" : 750, + "12" : 730 + } + }, + "CMPO" : { + "2013" : { + "1" : 0, + "2" : 0, + "3" : 0, + "4" : 0, + "5" : 0, + "6" : 0, + "7" : 0, + "8" : 0, + "9" : 0, + "10" : 64, + "11" : 75, + "12" : 22 + }, + "2014" : { + "1" : 15, + "2" : 15, + "3" : 19, + "4" : 12, + "5" : 13, + "6" : 14, + "7" : 22, + "8" : 12, + "9" : 36, + "10" : 6, + "11" : 8, + "12" : 10 + }, + "2015" : { + "1" : 7, + "2" : 40, + "3" : 140, + "4" : 320, + "5" : 560, + "6" : 320, + "7" : 210, + "8" : 230, + "9" : 220, + "10" : 10, + "11" : 220, + "12" : 880 + }, + "2016" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 520, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 730 + }, + "2017" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 220, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 170, + "11" : 750, + "12" : 30 + }, + "2018" : { + "1" : 2000, + "2" : 220, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 120, + "11" : 750, + "12" : 430 + }, + "2019" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 830 + }, + "2020" : { + "1" : 2000, + "2" : 210, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 380 + }, + "2021" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 550, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 794 + }, + "2022" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 990, + "7" : 340, + "8" : 320, + "9" : 610, + "10" : 180, + "11" : 750, + "12" : 738 + } + }, + "AEO" : { + "2013" : { + "1" : 0, + "2" : 0, + "3" : 0, + "4" : 0, + "5" : 0, + "6" : 0, + "7" : 0, + "8" : 0, + "9" : 0, + "10" : 129, + "11" : 142, + "12" : 70 + }, + "2014" : { + "1" : 116, + "2" : 93, + "3" : 85, + "4" : 132, + "5" : 96, + "6" : 137, + "7" : 69, + "8" : 158, + "9" : 123, + "10" : 221, + "11" : 163, + "12" : 43 + }, + "2015" : { + "1" : 25, + "2" : 230, + "3" : 330, + "4" : 220, + "5" : 650, + "6" : 320, + "7" : 840, + "8" : 440, + "9" : 220, + "10" : 110, + "11" : 210, + "12" : 270 + }, + "2016" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 520, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 335 + }, + "2017" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 220, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 170, + "11" : 750, + "12" : 732 + }, + "2018" : { + "1" : 2000, + "2" : 220, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 120, + "11" : 750, + "12" : 734 + }, + "2019" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 790 + }, + "2020" : { + "1" : 2000, + "2" : 210, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 350 + }, + "2021" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 550, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 730 + }, + "2022" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 990, + "7" : 340, + "8" : 320, + "9" : 610, + "10" : 180, + "11" : 750, + "12" : 730 + } + }, + "SNOMEDCT" : { + "2013" : { + "1" : 0, + "2" : 0, + "3" : 0, + "4" : 0, + "5" : 0, + "6" : 0, + "7" : 0, + "8" : 0, + "9" : 0, + "10" : 20721, + "11" : 22717, + "12" : 18565 + }, + "2014" : { + "1" : 17966, + "2" : 17212, + "3" : 20942, + "4" : 20376, + "5" : 21063, + "6" : 18734, + "7" : 18116, + "8" : 18676, + "9" : 15728, + "10" : 16348, + "11" : 13933, + "12" : 9533 + }, + "2015" : { + "1" : 9036, + "2" : 430, + "3" : 550, + "4" : 110, + "5" : 990, + "6" : 320, + "7" : 630, + "8" : 250, + "9" : 270, + "10" : 880, + "11" : 330, + "12" : 280 + }, + "2016" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 520, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 230 + }, + "2017" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 220, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 170, + "11" : 750, + "12" : 130 + }, + "2018" : { + "1" : 2000, + "2" : 220, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 120, + "11" : 750, + "12" : 330 + }, + "2019" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 920 + }, + "2020" : { + "1" : 2000, + "2" : 210, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 130 + }, + "2021" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 550, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 930 + }, + "2022" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 990, + "7" : 340, + "8" : 320, + "9" : 610, + "10" : 180, + "11" : 750, + "12" : 230 + } + }, + "TST" : { + "2013" : { + "1" : 0, + "2" : 0, + "3" : 23, + "4" : 0, + "5" : 0, + "6" : 0, + "7" : 20, + "8" : 0, + "9" : 0, + "10" : 234, + "11" : 7654, + "12" : 2311 + }, + "2014" : { + "1" : 39383, + "2" : 239, + "3" : 40273, + "4" : 3232, + "5" : 2, + "6" : 58734, + "7" : 11236, + "8" : 23, + "9" : 867, + "10" : 232, + "11" : 1111, + "12" : 8 + }, + "2015" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 710 + }, + "2016" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 520, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 720 + }, + "2017" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 220, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 170, + "11" : 750, + "12" : 73 + }, + "2018" : { + "1" : 2000, + "2" : 220, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 120, + "11" : 750, + "12" : 60 + }, + "2019" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 70 + }, + "2020" : { + "1" : 2000, + "2" : 210, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 790 + }, + "2021" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 550, + "5" : 270, + "6" : 980, + "7" : 340, + "8" : 320, + "9" : 630, + "10" : 110, + "11" : 750, + "12" : 70 + }, + "2022" : { + "1" : 2000, + "2" : 230, + "3" : 640, + "4" : 540, + "5" : 270, + "6" : 990, + "7" : 340, + "8" : 320, + "9" : 610, + "10" : 180, + "11" : 750, + "12" : 30 + } + } +} diff --git a/test/models/test_contact.rb b/test/models/test_contact.rb new file mode 100644 index 000000000..495a12b06 --- /dev/null +++ b/test/models/test_contact.rb @@ -0,0 +1,96 @@ +require_relative "../test_case" +class TestContact < LinkedData::TestCase + + def self.before_suite + self.new("before_suite").teardown + end + + def self.after_suite + self.new("after_suite").teardown + end + + def setup + @contact = LinkedData::Models::Contact.new({ + name: "Test Contact", + email: "test@example.com" + }) + end + + def teardown + # Clean up any test contacts + contacts = LinkedData::Models::Contact.where(name: "Test Contact").to_a + contacts.each { |c| c.delete } + end + + def test_contact_validation + contact = LinkedData::Models::Contact.new + refute contact.valid? + + contact.name = "Test Contact" + contact.email = "test@example.com" + assert contact.valid? + end + + def test_contact_lifecycle + contact = LinkedData::Models::Contact.new({ + name: "Lifecycle Test Contact", + email: "lifecycle@example.com" + }) + c = LinkedData::Models::Contact.where(email: contact.email).first + refute c + assert contact.valid? + contact.save + c = LinkedData::Models::Contact.where(email: contact.email).first + assert c + contact.delete + c = LinkedData::Models::Contact.where(email: contact.email).first + refute c + end + + def test_contact_missing_name + contact = LinkedData::Models::Contact.new({ + email: "test@example.com" + }) + refute contact.valid? + assert_includes contact.errors, :name + end + + def test_contact_missing_email + contact = LinkedData::Models::Contact.new({ + name: "Test Contact" + }) + refute contact.valid? + assert_includes contact.errors, :email + end + + def test_contact_invalid_email + contact = LinkedData::Models::Contact.new({ + name: "Test Contact", + email: "invalid-email" + }) + refute contact.valid? + assert_includes contact.errors, :email + end + + def test_contact_duplicate + skip "Duplicate contact prevention mechanism not yet implemented" + # Create and save first contact + contact1 = LinkedData::Models::Contact.new({ + name: "Duplicate Test Contact", + email: "duplicate@example.com" + }) + assert contact1.valid? + contact1.save + + # Try to create second contact with same name and email + contact2 = LinkedData::Models::Contact.new({ + name: "Duplicate Test Contact", + email: "duplicate@example.com" + }) + refute contact2.valid? + assert_includes contact2.errors, :name + assert_includes contact2.errors, :email + contact1.delete + end + +end \ No newline at end of file diff --git a/test/models/test_mappings.rb b/test/models/test_mappings.rb index e53a6b80d..501baa18c 100644 --- a/test/models/test_mappings.rb +++ b/test/models/test_mappings.rb @@ -35,6 +35,11 @@ def self.ontologies_parse process_rdf: true, extract_metadata: false) end + def delete_all_rest_mappings + LinkedData::Models::RestBackupMapping.all.each do |m| + LinkedData::Mappings.delete_rest_mapping(m.id) + end + end def test_mapping_count_models LinkedData::Models::MappingCount.where.all(&:delete) diff --git a/test/models/test_ontology.rb b/test/models/test_ontology.rb index f0b4f3c64..68a038948 100644 --- a/test/models/test_ontology.rb +++ b/test/models/test_ontology.rb @@ -153,7 +153,7 @@ def test_ontology_properties ont.bring(:submissions) sub = ont.submissions[0] props = ont.properties() - assert_equal 85, props.length + assert_equal 86, props.length # verify sorting assert_equal "http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#AlgorithmPurpose", props[0].id.to_s @@ -192,7 +192,7 @@ def test_ontology_properties # test property roots pr = ont.property_roots(sub, extra_include=[:hasChildren, :children]) - assert_equal 64, pr.length + assert_equal 65, pr.length # verify sorting assert_equal "http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#AlgorithmPurpose", pr[0].id.to_s @@ -206,7 +206,7 @@ def test_ontology_properties assert_equal 33, dpr.length # count annotation properties apr = pr.select { |p| p.class == LinkedData::Models::AnnotationProperty } - assert_equal 13, apr.length + assert_equal 14, apr.length # check for non-root properties assert_empty pr.select { |p| ["http://www.w3.org/2004/02/skos/core#broaderTransitive", "http://www.w3.org/2004/02/skos/core#topConceptOf", @@ -288,6 +288,18 @@ def test_valid_ontology u = LinkedData::Models::User.new(username: "tim") o.administeredBy = [@user] assert o.valid? + + o.name = "This name has\u200Bhidden content" + refute o.valid? + + o.name = "This ontology price is < than $1!" + refute o.valid? + + o.name = "This ontology looks like 🌍" + refute o.valid? + + o.name = "Ontology for the Comprehensive Integration of Multimodal Biomedical Research Data Covering Genomics, Proteomics, Transcriptomics, Metabolomics, Pharmacogenomics, and Clinical Trial Metadata in Global Health and Disease Surveillance Systems for Pandemic Preparedness and Precision Medicine Applications" + refute o.valid? end def test_ontology_delete diff --git a/test/models/test_ontology_analytics.rb b/test/models/test_ontology_analytics.rb new file mode 100644 index 000000000..ad5659553 --- /dev/null +++ b/test/models/test_ontology_analytics.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +require_relative '../test_case' +require 'mocha/minitest' +require 'mock_redis' + +class TestOntologyAnalytics < LinkedData::TestCase + def setup + super + LinkedData::Models::Ontology.class_variable_set(:@@redis, nil) + + LinkedData.settings.stubs(:ontology_analytics_redis_host).returns('localhost') + LinkedData.settings.stubs(:ontology_analytics_redis_port).returns('6379') + LinkedData.settings.stubs(:ontology_analytics_redis_field).returns('test_analytics') + + @mock_redis = MockRedis.new + Redis.stubs(:new).returns(@mock_redis) + + prepare_test_data + end + + def test_return_empty_hash_if_no_data_in_redis + @mock_redis.flushdb + result = LinkedData::Models::Ontology.analytics + assert_equal({}, result) + end + + def test_return_all_analytics + result = LinkedData::Models::Ontology.analytics + assert_equal @analytics, result + assert_equal 6, result.keys.length + end + + def test_return_analytics_for_specific_acronyms + acronyms = %w[NCIT CMPO AEO] + result = LinkedData::Models::Ontology.analytics(nil, nil, acronyms) + + assert_equal 3, result.size + assert_includes result.keys, 'NCIT' + assert_includes result.keys, 'CMPO' + assert_includes result.keys, 'AEO' + refute_includes result.keys, 'SNOMEDCT' + end + + def test_filter_analytics_by_year + result = LinkedData::Models::Ontology.analytics('2014') + + assert_equal 6, result.size + assert_equal @analytics['NCIT']['2014'], result['NCIT']['2014'] + assert_equal 17_212, result['SNOMEDCT']['2014']['2'] + assert_empty result['NCIT'].keys.map(&:to_s) & %w[2013 2015 2016 2017 2018 2019 2020 2021 2022] + end + + def test_filter_analytics_by_year_and_month_and_sort_results + result = LinkedData::Models::Ontology.analytics('2013', '10') + + # Expected order based on ontology_analytics_data.json for 2013-10: + # SNOMEDCT (20721), NCIT (2850), TST (234), AEO (129), CMPO (64), ONTOMA (6) + expected_keys = %w[SNOMEDCT NCIT TST AEO CMPO ONTOMA] + + assert_equal expected_keys, result.keys + assert_equal 20_721, result['SNOMEDCT']['2013']['10'] + assert_equal 6, result['ONTOMA']['2013']['10'] + end + + def test_retrieve_analytics_for_a_single_ontology + result = @snomed_ont.analytics + assert_equal @analytics['SNOMEDCT'], result['SNOMEDCT'] + assert_equal 1, result.size + end + + def test_retrieve_filtered_analytics_for_a_single_ontology + result = @ncit_ont.analytics('2014', '3') + assert_equal 2183, result['NCIT']['2014']['3'] + end + + def teardown + @mock_redis.flushdb + super + end + + private + + def prepare_test_data + @analytics = JSON.parse( + File.read(File.expand_path('../data/ontology_analytics_data.json', __dir__)) + ) + @mock_redis.set('test_analytics', Marshal.dump(@analytics)) + + @snomed_ont = LinkedData::Models::Ontology.new(acronym: 'SNOMEDCT') + @ncit_ont = LinkedData::Models::Ontology.new(acronym: 'NCIT') + end +end diff --git a/test/models/test_ontology_submission.rb b/test/models/test_ontology_submission.rb index 79ae72fe4..2f01db990 100644 --- a/test/models/test_ontology_submission.rb +++ b/test/models/test_ontology_submission.rb @@ -289,13 +289,13 @@ def test_submission_parse unless ENV["BP_SKIP_HEAVY_TESTS"] == "1" submission_parse("MCCLTEST", "MCCLS TEST", "./test/data/ontology_files/CellLine_OWL_BioPortal_v1.0.owl", 11, - process_rdf: true, extract_metadata: false) + process_rdf: true, extract_metadata: true) sub = LinkedData::Models::OntologySubmission.where(ontology: [acronym: "MCCLTEST"], submissionId: 11) .include(:version) .first - assert sub.version == "3.0" + assert_equal sub.version, "3.0" end #This one has resources wih accents. @@ -448,7 +448,7 @@ def test_index_properties "./test/data/ontology_files/BRO_v3.5.owl", 1, process_rdf: true, extract_metadata: false, index_properties: true) res = LinkedData::Models::Class.search("*:*", {:fq => "submissionAcronym:\"BRO\"", :start => 0, :rows => 80}, :property) - assert_equal 83 , res["response"]["numFound"] + assert_equal 84, res["response"]["numFound"] found = 0 res["response"]["docs"].each do |doc| @@ -1257,4 +1257,41 @@ def test_copy_file_repository_from_tempfile tmp.unlink end end + + # To test extraction of metadata when parsing a submission (we extract the submission attributes that have the + # extractedMetadata on true) + def test_submission_extract_metadata + 2.times.each do |i| + submission_parse("AGROOE", "AGROOE Test extract metadata ontology", + "./test/data/ontology_files/agrooeMappings-05-05-2016.owl", i + 1, + process_rdf: true, extract_metadata: true, generate_missing_labels: false) + ont = LinkedData::Models::Ontology.find("AGROOE").first + sub = ont.latest_submission + refute_nil sub + + sub.bring_remaining + assert_equal false, sub.deprecated + assert_equal '2015-09-28', sub.creationDate.to_date.to_s + assert_equal '2015-10-01', sub.modificationDate.to_date.to_s + assert_equal "description example, AGROOE is an ontology used to test the metadata extraction, AGROOE is an ontology to illustrate how to describe their ontologies", sub.description + assert_equal [RDF::URI.new('http://agroportal.lirmm.fr')], sub.identifier + assert_equal ["http://lexvo.org/id/iso639-3/fra", "http://lexvo.org/id/iso639-3/eng"].sort, sub.naturalLanguage.sort + assert_equal [RDF::URI.new("http://lirmm.fr/2015/ontology/door-relation.owl"), RDF::URI.new("http://lirmm.fr/2015/ontology/dc-relation.owl"), + RDF::URI.new("http://lirmm.fr/2015/ontology/dcterms-relation.owl"), + RDF::URI.new("http://lirmm.fr/2015/ontology/voaf-relation.owl"), + RDF::URI.new("http://lirmm.fr/2015/ontology/void-import.owl") + ].sort, sub.ontologyRelatedTo.sort + + + + + # assert_equal ["Agence 007", "Éditions \"La Science en Marche\"", " LIRMM (default name) "].sort, sub.publisher.map { |x| x.bring_remaining.name }.sort + # assert_equal ["Alfred DC", "Clement Jonquet", "Gaston Dcterms", "Huguette Doap", "Mirabelle Prov", "Paul Foaf", "Vincent Emonet"].sort, sub.hasCreator.map { |x| x.bring_remaining.name }.sort + # assert_equal ["Léontine Dessaiterm", "Anne Toulet", "Benjamine Dessay", "Augustine Doap", "Vincent Emonet"].sort, sub.hasContributor.map { |x| x.bring_remaining.name }.sort + # assert_equal 1, LinkedData::Models::Agent.where(name: "Vincent Emonet").count + + sub.description = "test changed value" + sub.save + end + end end diff --git a/test/models/test_project.rb b/test/models/test_project.rb index 2483b1b4d..da38f84c8 100644 --- a/test/models/test_project.rb +++ b/test/models/test_project.rb @@ -189,6 +189,16 @@ def test_valid_project p.creator = @project_params[:creator] p.ontologyUsed = @project_params[:ontologyUsed] assert p.valid? + + p.name = "Global Collaborative Project for the Standardization, Harmonization, and Federated Analysis of Longitudinal Multi-Center Patient Health Records, Genomic Sequencing Data, and Real-World Evidence to Accelerate Discovery and Deployment of AI-Driven Clinical Decision Support Systems and Therapeutic Innovations" + refute p.valid? + + p.name = "Valid name" + p.contacts = "Evil contacts text\u202Eevil.com" + refute p.valid? + + p.contacts = "Good contacts" + assert p.valid? end def test_project_lifecycle diff --git a/test/models/user/test_user.rb b/test/models/user/test_user.rb index 958970a3e..2f123a159 100644 --- a/test/models/user/test_user.rb +++ b/test/models/user/test_user.rb @@ -28,21 +28,46 @@ def teardown def test_valid_user u = LinkedData::Models::User.new - assert (not u.valid?) + refute u.valid? u.username = "test_user1" u.email = "test@example.com" u.password = "a_password" assert u.valid? + + u.username = "really_really_really_really_really_really_really_long_username" + refute u.valid? + + u.username = 'username_with_🌍_character' + refute u.valid? + + u.username = "username_with\nnewline" + refute u.valid? + + u.username = "username_with\ntab" + refute u.valid? + + u.username = "username_<1>!" + refute u.valid? + + u.username = "username_with\u200Bhidden_char" + refute u.valid? + + # u.username = "test bad username" + # refute u.valid? + # u.username = "test.bad.username" + # refute u.valid? + # u.username = "" + # refute u.valid? end def test_user_lifecycle - assert_equal false, @u.exist?(reload=true) + refute @u.exist?(reload=true) assert @u.valid? @u.save - assert_equal true, @u.exist?(reload=true) + assert @u.exist?(reload=true) @u.delete - assert_equal false, @u.exist?(reload=true) + refute @u.exist?(reload=true) end def test_user_role_assign @@ -83,4 +108,82 @@ def test_user_default_uuid u.delete end + def test_user_email_validation_missing_email + u = LinkedData::Models::User.new({ + username: "test_user_no_email", + password: "a_password" + }) + refute u.valid? + assert u.errors.include?(:email) + end + + def test_user_email_validation_empty_email + u = LinkedData::Models::User.new({ + username: "test_user_empty_email", + email: "", + password: "a_password" + }) + refute u.valid? + assert u.errors.include?(:email) + end + + def test_user_email_validation_nil_email + u = LinkedData::Models::User.new({ + username: "test_user_nil_email", + email: nil, + password: "a_password" + }) + refute u.valid? + assert u.errors.include?(:email) + end + + def test_user_email_validation_invalid_formats + invalid_emails = [ + "invalid-email", + "@example.com", + "test@", + "test@.com", + "test..test@example.com", + "test@example..com", + "test@example", + "test space@example.com", + "test@example com", + "test@example.com.", + ".test@example.com" + ] + + invalid_emails.each_with_index do |email, index| + u = LinkedData::Models::User.new({ + username: "test_user_invalid_email_#{index}", + email: email, + password: "a_password" + }) + refute u.valid?, "Email '#{email}' should be invalid" + assert u.errors.include?(:email), "Email '#{email}' should have email validation error" + end + end + + def test_user_email_validation_valid_formats + valid_emails = [ + "test@example.com", + "user.name@example.com", + "user+tag@example.com", + "user@subdomain.example.com", + "user@example.co.uk", + "user@example-domain.com", + "user123@example.com", + "user-name@example.com" + ] + + valid_emails.each_with_index do |email, index| + u = LinkedData::Models::User.new({ + username: "test_user_valid_email_#{index}", + email: email, + password: "a_password" + }) + assert u.valid?, "Email '#{email}' should be valid" + refute u.errors.include?(:email), "Email '#{email}' should not have email validation error" + end + end + end