Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions bothub/api/v2/evaluate/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ class Meta:
help_text=_("Filter for repository cross_validation results."),
)

type = filters.CharFilter(
field_name="type",

@elitonzky elitonzky May 25, 2023

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains trailing whitespace in:
field_name="type",
for more details: https://www.flake8rules.com/rules/W291.html

method="filter_evaluate_type",
help_text=_("Filter by evaluate type")
)

def filter_repository_uuid(self, queryset, name, value):
request = self.request
try:
Expand Down Expand Up @@ -139,6 +145,9 @@ def filter_repository_version(self, queryset, name, value):

def filter_repository_cross_validation(self, queryset, name, value):
return queryset.filter(cross_validation=value)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains whitespace:

for more details: https://www.flake8rules.com/rules/W293.html

def filter_evaluate_type(self, queryset, name, value):
return queryset.filter(evaluate_type=value)


class EvaluateResultFilter(filters.FilterSet):
Expand Down
50 changes: 49 additions & 1 deletion bothub/api/v2/evaluate/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,18 @@ def update(self, instance, validated_data):
class RepositoryEvaluateResultVersionsSerializer(serializers.ModelSerializer):
class Meta:
model = RepositoryEvaluateResult
fields = ["id", "language", "created_at", "version", "cross_validation"]
fields = ["id", "language", "created_at", "version", "cross_validation", "accuracy"]
ref_name = None

language = serializers.SerializerMethodField()
accuracy = serializers.SerializerMethodField()

def get_language(self, obj):
return obj.repository_version_language.language

def get_accuracy(self, obj):
return obj.intent_results.accuracy


class RepositoryEvaluateResultScore(serializers.ModelSerializer):
class Meta:
Expand Down Expand Up @@ -192,6 +196,11 @@ class Meta:
"intent_results",
"entity_results",
"cross_validation",
"accuracy",
"evaluate_type",
"qualitity",
"recommendations",
"count_logs"
]
ref_name = None

Expand All @@ -201,6 +210,11 @@ class Meta:
repository_version = serializers.SerializerMethodField()
intent_results = RepositoryEvaluateResultScore(read_only=True)
entity_results = RepositoryEvaluateResultScore(read_only=True)
accuracy = serializers.SerializerMethodField()
evaluate_type = serializers.IntegerField(required=False, help_text="type from evaluate")
qualitity = serializers.SerializerMethodField()
recommendations = serializers.SerializerMethodField()
count_logs = serializers.SerializerMethodField()

def get_intents_list(self, obj):
return RepositoryEvaluateResultIntentSerializer(
Expand Down Expand Up @@ -297,3 +311,37 @@ def filter_intent(log, intent, min_confidence, max_confidence):
}

return {"total_pages": 0, "current_page": 1, "results": []}

def get_accuracy(self, obj):
return obj.intent_results.accuracy

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains whitespace:

for more details: https://www.flake8rules.com/rules/W293.html

def get_qualitity(self, obj):
intents = json.loads(obj.log)
success_count = 0
for intent in intents:
success_count += 1 if intent.get("intent_status") == "success" else 0
return (success_count * 100) / len(intents)

def get_recommendations(self, obj):
intents = json.loads(obj.log)
count_intents = {}
reccommendations = []
sum_intents = 0
qnt_intents = 0

for intent in intents:
if intent.get("intent") not in count_intents:
qnt_intents += 1
count_intents[intent.get("intent")] = 0
count_intents[intent.get("intent")] += 1
sum_intents += 1

avg_intents = sum_intents / qnt_intents

for intent in count_intents:
if count_intents.get(intent) < avg_intents:
reccommendations.append(intent)
return {"add_phares_to": reccommendations}

def get_count_logs(self, obj):
return len(json.loads(obj.log))
9 changes: 9 additions & 0 deletions bothub/api/v2/repository/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
RequestRepositoryAuthorization,
RepositoryVersionLanguage,
QAKnowledgeBase,
RepositoryNLPLog,
)
from bothub.utils import classifier_choice
from .validators import (
Expand Down Expand Up @@ -486,6 +487,7 @@ class Meta:
"repository_score",
"repository_version_language",
"repository_type",
"has_training",
]
read_only = [
"uuid",
Expand Down Expand Up @@ -617,6 +619,7 @@ class Meta:
repository_type = serializers.CharField(
style={"show": False}, read_only=True, source="repository.repository_type"
)
has_training = serializers.SerializerMethodField()

def get_authorizations(self, obj):
auths = RepositoryAuthorization.objects.filter(
Expand Down Expand Up @@ -874,6 +877,12 @@ def get_repository_score(self, obj):
def get_repository_version_language(self, obj):
return obj.repositoryversionlanguage_set.all().values("id", "language")

def get_has_training(self, obj):
logs = RepositoryNLPLog.objects.filter(
repository_version_language__repository_version__repository=obj.repository
)
return logs.exists()


class RepositoryTrainInfoSerializer(serializers.ModelSerializer):
class Meta:
Expand Down
107 changes: 93 additions & 14 deletions bothub/api/v2/repository/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
RepositoryVote,
RequestRepositoryAuthorization,
RepositoryVersionLanguage,
RepositoryEvaluate,
RepositoryEvaluateResult,
Organization,
)

Expand Down Expand Up @@ -127,6 +129,8 @@
ConnectRESTClient as ConnectClient,
)

from bothub.utils import levenshtein_distance

User = get_user_model()


Expand Down Expand Up @@ -715,21 +719,55 @@ def evaluate(self, request, **kwargs):
user_authorization = repository.get_user_authorization(request.user)
if not user_authorization.can_write:
raise PermissionDenied()
serializer = EvaluateSerializer(data=request.data) # pragma: no cover
serializer.is_valid(raise_exception=True) # pragma: no cover

try:
request = repository.request_nlp_manual_evaluate( # pragma: no cover
user_authorization, serializer.data
)
except DjangoValidationError as e:
raise APIException(e.message, code=400)
data = request.data
response = []
version_languages = RepositoryVersionLanguage.objects.filter(repository_version__pk=data.get("repository_version"))

@elitonzky elitonzky May 26, 2023

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line is long exceeding 119 characters, you can adjust it like this:

version_languages = RepositoryVersionLanguage.objects.filter(
    repository_version__pk=data.get("repository_version")
)

for version_language in version_languages:
if not repository.have_at_least_one_test_phrase_registered(version_language.language):
continue
if "language" in data:
data["language"] = version_language.language
else:
data.update({"language": version_language.language})
serializer = EvaluateSerializer(data=data) # pragma: no cover
serializer.is_valid(raise_exception=True) # pragma: no cover

if request.status_code != status.HTTP_200_OK: # pragma: no cover
raise APIException(
{"status_code": request.status_code}, code=request.status_code
) # pragma: no cover
return Response(request.json()) # pragma: no cover
try:
nlp_request = repository.request_nlp_manual_evaluate( # pragma: no cover
user_authorization, serializer.data
)
except DjangoValidationError as e:
raise APIException(e.message, code=400)

if nlp_request.status_code != status.HTTP_200_OK: # pragma: no cover
raise APIException(
{"status_code": nlp_request.status_code}, code=nlp_request.status_code
) # pragma: no cover

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains whitespace:

for more details: https://www.flake8rules.com/rules/W293.html

nlp_response = nlp_request.json()

evaluate_id = nlp_response.get("evaluate_id")
evaluate_result = RepositoryEvaluateResult.objects.get(pk=evaluate_id)
if request.data.get("evaluate_type", False):
evaluate_result.evaluate_type = request.data.get("evaluate_type")
evaluate_result.save()
logs = json.loads(evaluate_result.log)
intent_count = 0
intent_success = 0

for res in logs:
intent_count += 1
intent_success += 1 if res.get("intent_status") == "success" else 0

result_data = {
"accuracy": evaluate_result.intent_results.accuracy,
"intents_count": intent_count,
"intents_success": intent_success,
"evalute_type": evaluate_result.evaluate_type,
}
nlp_response.update(result_data)
response.append(nlp_response)
return Response(response) # pragma: no cover

@action(
detail=True,
Expand Down Expand Up @@ -789,6 +827,47 @@ def check_can_automatic_evaluate(self, request, **kwargs):
return Response(response) # pragma: no cover


@action(

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains two blank lines, by default we use 1 line for methods and 2 for classes.

detail=True,
methods=["GET"],
url_name="get-recommendations-repository",
)
def get_recommendations_repository(self, request, **kwargs):
repository = self.get_object()
user_authorization = repository.get_user_authorization(request.user)
if not user_authorization.can_write:
raise PermissionDenied()

examples = RepositoryExample.objects.filter(repository_version_language__repository_version__repository=repository)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line is long exceeding 119 characters, you can adjust it like this:

        examples = RepositoryExample.objects.filter(
            repository_version_language__repository_version__repository=repository
        )

intents = {}
sum_intents = 0
qnt_intents = 0
sum_distance = 0

for example in examples:
if example.intent.text not in intents:
intents[example.intent.text] = {"text": [], "count": 0, "distance": 0}
intents[example.intent.text]["text"].append(example.text)
intents[example.intent.text]["count"] += 1
sum_intents += 1
qnt_intents += 1
response = {"add_phares_to": [], "more_diversity": []}
avg_intents = (sum_intents/qnt_intents)
for intent in intents:
for i in range(0, intents[intent]['count']):
for j in range(i, intents[intent]['count']):
intents[intent]['distance'] += levenshtein_distance(intents[intent]['text'][i], intents[intent]['text'][j])

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line is long exceeding 119 characters, you can adjust it like this:

                for j in range(i, intents[intent]["count"]):
                    intents[intent]["distance"] += levenshtein_distance(
                        intents[intent]["text"][i], intents[intent]["text"][j]
                    )

sum_distance += intents[intent]['distance']

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains whitespace:

for more details: https://www.flake8rules.com/rules/W293.html

avg_distance = sum_distance / qnt_intents
for intent in intents:
if intents[intent]['count'] < avg_intents:
response["add_phares_to"].append(intent)
if intents[intent]['distance'] < avg_distance:
response["more_diversity"].append(intent)
return Response(data=response)


@method_decorator(
name="list",
decorator=swagger_auto_schema(
Expand Down
24 changes: 24 additions & 0 deletions bothub/common/migrations/0118_auto_20230517_1711.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 3.2.15 on 2023-05-17 17:11

from django.db import migrations, models
import uuid


class Migration(migrations.Migration):

dependencies = [
('common', '0117_alter_zeroshotoptions_option_uuid'),
]

operations = [
migrations.AddField(
model_name='repositoryevaluateresult',
name='evaluate_type',
field=models.PositiveIntegerField(blank=True, choices=[(0, 'manual'), (1, 'automatic')], default=0, null=True, verbose_name='role'),
),
migrations.AlterField(
model_name='zeroshotoptions',
name='option_uuid',
field=models.UUIDField(default=uuid.UUID('4e0cf37b-a6b2-40fd-a3cb-cce5bda8e14e')),
),
]
12 changes: 12 additions & 0 deletions bothub/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2381,6 +2381,18 @@ class Meta:

cross_validation = models.BooleanField(_("cross validation"), default=False)

TYPE_MANUAL = 0
TYPE_AUTOMATIC = 1

EVALUATE_TYPES_CHOICE = [
(TYPE_MANUAL, "manual"),
(TYPE_AUTOMATIC, "automatic")
]

evaluate_type = models.PositiveIntegerField(
_("role"), choices=EVALUATE_TYPES_CHOICE, default=TYPE_MANUAL, blank=True, null=True
)

def save(self, *args, **kwargs):
repository = self.repository_version_language.repository_version.repository
self.version = repository.evaluations_results().count() + 1
Expand Down
26 changes: 26 additions & 0 deletions bothub/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,3 +512,29 @@ def check_module_permission(claims, user):
"categories_list",
"repository_type",
]


def levenshtein_distance(str1, str2):

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lines with blank lines after separators:
520,522,524,529,535.
Lines missing whitespace after ",":
529, 535, 536, 537, 538.
you could do it like this:

def levenshtein_distance(str1, str2):
    size_x = len(str1) + 1
    size_y = len(str2) + 1
    matrix = np.zeros((size_x, size_y))
    for x in range(size_x):
        matrix[x, 0] = x
    for y in range(size_y):
        matrix[0, y] = y

    for x in range(1, size_x):
        for y in range(1, size_y):
            if str1[x - 1] == str2[y - 1]:
                matrix[x, y] = min(
                    matrix[x - 1, y] + 1, matrix[x - 1, y - 1], matrix[x, y - 1] + 1
                )
            else:
                matrix[x, y] = min(
                    matrix[x - 1, y] + 1, matrix[x - 1, y - 1] + 1, matrix[x, y - 1] + 1
                )
    return matrix[size_x - 1, size_y - 1]

size_x = len(str1) + 1
size_y = len(str2) + 1
matrix = np.zeros ((size_x, size_y))
for x in range(size_x):
matrix [x, 0] = x
for y in range(size_y):
matrix [0, y] = y

for x in range(1, size_x):
for y in range(1, size_y):
if str1[x-1] == str2[y-1]:
matrix [x,y] = min(
matrix[x-1, y] + 1,
matrix[x-1, y-1],
matrix[x, y-1] + 1
)
else:
matrix [x,y] = min(
matrix[x-1,y] + 1,
matrix[x-1,y-1] + 1,
matrix[x,y-1] + 1
)
return (matrix[size_x - 1, size_y - 1])