diff --git a/docs/source/conf.py b/docs/source/conf.py
index 39e0d881..4371c927 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -25,7 +25,7 @@ version_file = '../../modelscope/version.py'
 
 
 def get_version():
-    with open(version_file, 'r') as f:
+    with open(version_file, 'r', encoding='utf-8') as f:
         exec(compile(f.read(), version_file, 'exec'))
     return locals()['__version__']
 
diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
index f2ff822d..17c21d44 100644
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -739,7 +739,7 @@ class ModelScopeConfig:
             with open(
                     os.path.join(ModelScopeConfig.path_credential,
                                  ModelScopeConfig.USER_INFO_FILE_NAME),
-                    'r') as f:
+                    'r', encoding='utf-8') as f:
                 info = f.read()
                 return info.split(':')[0], info.split(':')[1]
         except FileNotFoundError:
@@ -760,7 +760,7 @@ class ModelScopeConfig:
             with open(
                     os.path.join(ModelScopeConfig.path_credential,
                                  ModelScopeConfig.GIT_TOKEN_FILE_NAME),
-                    'r') as f:
+                    'r', encoding='utf-8') as f:
                 token = f.read()
         except FileNotFoundError:
             pass
diff --git a/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py b/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py
index cc47d0c4..9378c32a 100644
--- a/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py
+++ b/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py
@@ -21,7 +21,7 @@ class KanTtsText2MelDataset(Dataset):
 
         self.cache = cache
 
-        with open(config_filename) as f:
+        with open(config_filename, encoding='utf-8') as f:
             self._config = json.loads(f.read())
 
         # Load metadata:
diff --git a/modelscope/models/audio/tts/sambert_hifi.py b/modelscope/models/audio/tts/sambert_hifi.py
index a9b55795..9a14219e 100644
--- a/modelscope/models/audio/tts/sambert_hifi.py
+++ b/modelscope/models/audio/tts/sambert_hifi.py
@@ -60,7 +60,7 @@ class SambertHifigan(Model):
         with zipfile.ZipFile(zip_file, 'r') as zip_ref:
             zip_ref.extractall(model_dir)
         voice_cfg_path = os.path.join(self.__voice_path, 'voices.json')
-        with open(voice_cfg_path, 'r') as f:
+        with open(voice_cfg_path, 'r', encoding='utf-8') as f:
             voice_cfg = json.load(f)
         if 'voices' not in voice_cfg:
             raise TtsModelConfigurationException(
diff --git a/modelscope/models/cv/tinynas_classfication/plain_net_utils.py b/modelscope/models/cv/tinynas_classfication/plain_net_utils.py
index 844535ed..1f5c8852 100644
--- a/modelscope/models/cv/tinynas_classfication/plain_net_utils.py
+++ b/modelscope/models/cv/tinynas_classfication/plain_net_utils.py
@@ -39,7 +39,7 @@ class PlainNet(nn.Module):
                 plainnet_struct_txt = self.module_opt.plainnet_struct_txt
 
             if plainnet_struct_txt is not None:
-                with open(plainnet_struct_txt, 'r') as fid:
+                with open(plainnet_struct_txt, 'r', encoding='utf-8') as fid:
                     the_line = fid.readlines()[0].strip()
                     self.plainnet_struct = the_line
                 pass
diff --git a/modelscope/models/multi_modal/clip/bert_tokenizer.py b/modelscope/models/multi_modal/clip/bert_tokenizer.py
index 8d356f42..1ee715c9 100644
--- a/modelscope/models/multi_modal/clip/bert_tokenizer.py
+++ b/modelscope/models/multi_modal/clip/bert_tokenizer.py
@@ -120,7 +120,7 @@ def load_vocab(vocab_file):
     """Loads a vocabulary file into a dictionary."""
     vocab = collections.OrderedDict()
     index = 0
-    with open(vocab_file, 'r') as reader:
+    with open(vocab_file, 'r', encoding='utf-8') as reader:
         while True:
             token = convert_to_unicode(reader.readline())
             if not token:
diff --git a/modelscope/models/multi_modal/clip/model.py b/modelscope/models/multi_modal/clip/model.py
index 9b82e4a1..c2d82dca 100644
--- a/modelscope/models/multi_modal/clip/model.py
+++ b/modelscope/models/multi_modal/clip/model.py
@@ -523,8 +523,10 @@ class CLIPForMultiModalEmbedding(TorchModel):
         logger.info(f'Loading text model config from {text_model_config_file}')
         assert os.path.exists(text_model_config_file)
 
-        with open(vision_model_config_file,
-                  'r') as fv, open(text_model_config_file, 'r') as ft:
+        with open(
+                vision_model_config_file, 'r',
+                encoding='utf-8') as fv,\
+                open(text_model_config_file, 'r', encoding='utf-8') as ft:
             self.model_info = json.load(fv)
             for k, v in json.load(ft).items():
                 self.model_info[k] = v
diff --git a/modelscope/models/multi_modal/diffusion/model.py b/modelscope/models/multi_modal/diffusion/model.py
index 4229391f..5150a0c3 100644
--- a/modelscope/models/multi_modal/diffusion/model.py
+++ b/modelscope/models/multi_modal/diffusion/model.py
@@ -76,7 +76,7 @@ class DiffusionModel(nn.Module):
         super(DiffusionModel, self).__init__()
         # including text and generator config
         model_config = json.load(
-            open('{}/model_config.json'.format(model_dir)))
+            open('{}/model_config.json'.format(model_dir), encoding='utf-8'))
 
         # text encoder
         text_config = model_config['text_config']
@@ -142,7 +142,9 @@ class DiffusionForTextToImageSynthesis(Model):
 
         # diffusion process
         diffusion_params = json.load(
-            open('{}/diffusion_config.json'.format(model_dir)))
+            open(
+                '{}/diffusion_config.json'.format(model_dir),
+                encoding='utf-8'))
         self.diffusion_generator = make_diffusion(
             **diffusion_params['generator_config'])
         self.diffusion_upsampler_256 = make_diffusion(
diff --git a/modelscope/models/multi_modal/diffusion/structbert.py b/modelscope/models/multi_modal/diffusion/structbert.py
index d5d678ed..16c1407f 100644
--- a/modelscope/models/multi_modal/diffusion/structbert.py
+++ b/modelscope/models/multi_modal/diffusion/structbert.py
@@ -130,7 +130,7 @@ class BertConfig(object):
     @classmethod
     def from_json_file(cls, json_file):
         """Constructs a `BertConfig` from a json file of parameters."""
-        with open(json_file, 'r') as reader:
+        with open(json_file, 'r', encoding='utf-8') as reader:
             text = reader.read()
         return cls.from_dict(json.loads(text))
 
diff --git a/modelscope/models/multi_modal/diffusion/tokenizer.py b/modelscope/models/multi_modal/diffusion/tokenizer.py
index 82c09661..e2c951b1 100644
--- a/modelscope/models/multi_modal/diffusion/tokenizer.py
+++ b/modelscope/models/multi_modal/diffusion/tokenizer.py
@@ -67,7 +67,7 @@ def load_vocab(vocab_file):
     """Loads a vocabulary file into a dictionary."""
     vocab = collections.OrderedDict()
     index = 0
-    with open(vocab_file, 'r') as reader:
+    with open(vocab_file, 'r', encoding='utf-8') as reader:
         while True:
             token = convert_to_unicode(reader.readline())
             if not token:
diff --git a/modelscope/models/multi_modal/gemm/gemm_base.py b/modelscope/models/multi_modal/gemm/gemm_base.py
index 806c469c..c77a682a 100644
--- a/modelscope/models/multi_modal/gemm/gemm_base.py
+++ b/modelscope/models/multi_modal/gemm/gemm_base.py
@@ -522,7 +522,9 @@ class GEMMModel(nn.Module):
 
     def __init__(self, model_dir):
         super().__init__()
-        with open('{}/encoder_config.json'.format(model_dir), 'r') as f:
+        with open(
+                '{}/encoder_config.json'.format(model_dir), 'r',
+                encoding='utf-8') as f:
             model_config = json.loads(f.read())
         model_name = list(model_config.keys())[0]
         config_args = model_config[model_name]
diff --git a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
index 0cc040c6..813f750e 100644
--- a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
+++ b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
@@ -35,7 +35,9 @@ class VideoCLIPForMultiModalEmbedding(TorchModel):
     def __init__(self, model_dir, **kwargs):
         super().__init__(model_dir=model_dir, **kwargs)
         # model config parameters
-        with open(f'{model_dir}/{ModelFile.CONFIGURATION}', 'r') as json_file:
+        with open(
+                f'{model_dir}/{ModelFile.CONFIGURATION}', 'r',
+                encoding='utf-8') as json_file:
             model_config = json.load(json_file)
         model_config = model_config['paras']
         model_config['model_dir'] = model_dir
diff --git a/modelscope/models/multi_modal/mplug/configuration_mplug.py b/modelscope/models/multi_modal/mplug/configuration_mplug.py
index 914678c5..946ebb82 100644
--- a/modelscope/models/multi_modal/mplug/configuration_mplug.py
+++ b/modelscope/models/multi_modal/mplug/configuration_mplug.py
@@ -111,6 +111,6 @@ class MPlugConfig(PretrainedConfig):
     @classmethod
     def from_yaml_file(cls, yaml_file: Union[str,
                                              os.PathLike]) -> Dict[str, Any]:
-        with open(yaml_file, 'r') as reader:
+        with open(yaml_file, 'r', encoding='utf-8') as reader:
             config_dict = yaml.load(reader, Loader=yaml.Loader)
         return cls(**config_dict)
diff --git a/modelscope/models/multi_modal/multi_stage_diffusion/model.py b/modelscope/models/multi_modal/multi_stage_diffusion/model.py
index 59bd837d..58fd6698 100644
--- a/modelscope/models/multi_modal/multi_stage_diffusion/model.py
+++ b/modelscope/models/multi_modal/multi_stage_diffusion/model.py
@@ -50,7 +50,8 @@ class UnCLIP(nn.Module):
     def __init__(self, model_dir):
         super(UnCLIP, self).__init__()
         self.model_dir = model_dir
-        self.config = json.load(open(f'{model_dir}/{ModelFile.CONFIGURATION}'))
+        self.config = json.load(
+            open(f'{model_dir}/{ModelFile.CONFIGURATION}', encoding='utf-8'))
 
         # modules
         self.clip = CLIP(**self.config['clip']).fp16()
diff --git a/modelscope/models/multi_modal/ofa_for_all_tasks.py b/modelscope/models/multi_modal/ofa_for_all_tasks.py
index fc578b25..77dff54a 100644
--- a/modelscope/models/multi_modal/ofa_for_all_tasks.py
+++ b/modelscope/models/multi_modal/ofa_for_all_tasks.py
@@ -312,7 +312,7 @@ class OfaForAllTasks(TorchModel):
         if self.cfg.model.get('answer2label', None):
             ans2label_file = osp.join(self.model_dir,
                                       self.cfg.model.answer2label)
-            with open(ans2label_file, 'r') as reader:
+            with open(ans2label_file, 'r', encoding='utf-8') as reader:
                 self.ans2label_dict = json.load(reader)
 
     def save_pretrained(self,
diff --git a/modelscope/models/nlp/mglm/arguments.py b/modelscope/models/nlp/mglm/arguments.py
index 13b3aeab..4fa33c65 100755
--- a/modelscope/models/nlp/mglm/arguments.py
+++ b/modelscope/models/nlp/mglm/arguments.py
@@ -743,7 +743,7 @@ def get_args():
 
     if hasattr(args, 'deepspeed'
                ) and args.deepspeed and args.deepspeed_config is not None:
-        with open(args.deepspeed_config) as file:
+        with open(args.deepspeed_config, encoding='utf-8') as file:
             deepspeed_config = json.load(file)
         if 'train_micro_batch_size_per_gpu' in deepspeed_config:
             args.batch_size = deepspeed_config[
diff --git a/modelscope/models/nlp/mglm/data_utils/corpora.py b/modelscope/models/nlp/mglm/data_utils/corpora.py
index 7c6f58f8..cf756c0a 100755
--- a/modelscope/models/nlp/mglm/data_utils/corpora.py
+++ b/modelscope/models/nlp/mglm/data_utils/corpora.py
@@ -156,7 +156,7 @@ class DataReader:
         def read_input_to_queue():
             for path in paths:
                 print_rank_0(f'Start reading {path}')
-                with open(path) as file:
+                with open(path, encoding='utf-8') as file:
                     items = json.load(file)
                     for item in items:
                         task_queue.put(item)
diff --git a/modelscope/models/nlp/mglm/data_utils/datasets.py b/modelscope/models/nlp/mglm/data_utils/datasets.py
index 777b7d43..39ffaea3 100644
--- a/modelscope/models/nlp/mglm/data_utils/datasets.py
+++ b/modelscope/models/nlp/mglm/data_utils/datasets.py
@@ -511,12 +511,12 @@ class json_dataset(data.Dataset):
 
     def load_json_stream(self, load_path):
         if not self.loose_json:
-            jsons = json.load(open(load_path, 'r'))
+            jsons = json.load(open(load_path, 'r', encoding='utf-8'))
             generator = iter(jsons)
         else:
 
             def gen_helper():
-                with open(load_path, 'r') as f:
+                with open(load_path, 'r', encoding='utf-8') as f:
                     for row in f:
                         yield json.loads(row)
 
diff --git a/modelscope/models/nlp/mglm/data_utils/extraction.py b/modelscope/models/nlp/mglm/data_utils/extraction.py
index 53027e4f..da062f34 100644
--- a/modelscope/models/nlp/mglm/data_utils/extraction.py
+++ b/modelscope/models/nlp/mglm/data_utils/extraction.py
@@ -29,7 +29,9 @@ with open(output_path, 'w') as output:
             print(filename)
             article_lines = []
             article_open = False
-            with open(filename, mode='r', newline='\n') as file:
+            with open(
+                    filename, mode='r', newline='\n',
+                    encoding='utf-8') as file:
                 for line in file:
                     line = line.rstrip()
                     if '<doc id=' in line:
diff --git a/modelscope/models/nlp/mglm/data_utils/tokenization_gpt2.py b/modelscope/models/nlp/mglm/data_utils/tokenization_gpt2.py
index d179e055..84947ee8 100644
--- a/modelscope/models/nlp/mglm/data_utils/tokenization_gpt2.py
+++ b/modelscope/models/nlp/mglm/data_utils/tokenization_gpt2.py
@@ -179,7 +179,7 @@ class GPT2Tokenizer(object):
                  special_tokens=None,
                  max_len=None):
         self.max_len = max_len if max_len is not None else int(1e12)
-        self.encoder = json.load(open(vocab_file))
+        self.encoder = json.load(open(vocab_file), encoding='utf-8')
         self.decoder = {v: k for k, v in self.encoder.items()}
         self.errors = errors  # how to handle errors in decoding
         self.byte_encoder = bytes_to_unicode()
diff --git a/modelscope/models/nlp/mglm/process_grid.py b/modelscope/models/nlp/mglm/process_grid.py
index d425c970..e2d26cce 100644
--- a/modelscope/models/nlp/mglm/process_grid.py
+++ b/modelscope/models/nlp/mglm/process_grid.py
@@ -19,7 +19,7 @@ for dir_path in glob.glob(path_pattern, recursive=True):
     valid_path = os.path.join(dir_path, 'results.json')
     if os.path.exists(valid_path):
         print(entry)
-        with open(valid_path) as file:
+        with open(valid_path, encoding='utf-8') as file:
             valid_result = json.load(file)
     else:
         print(f'{entry} no validation results')
diff --git a/modelscope/models/nlp/mglm/tasks/language_model/dataset.py b/modelscope/models/nlp/mglm/tasks/language_model/dataset.py
index cfdfa714..f2ecec37 100644
--- a/modelscope/models/nlp/mglm/tasks/language_model/dataset.py
+++ b/modelscope/models/nlp/mglm/tasks/language_model/dataset.py
@@ -121,7 +121,7 @@ class LambadaDataset(torch.utils.data.Dataset):
 
         self.tokens = []
         self.labels = []
-        with open(data_path, 'r') as f:
+        with open(data_path, 'r', encoding='utf-8') as f:
             for line in f.readlines():
                 text = json.loads(line)['text']
                 tokens, labels = self.get_tokens(text)
diff --git a/modelscope/models/nlp/mglm/tasks/seq2seq/dataset.py b/modelscope/models/nlp/mglm/tasks/seq2seq/dataset.py
index 6a4e275f..0e209e1a 100644
--- a/modelscope/models/nlp/mglm/tasks/seq2seq/dataset.py
+++ b/modelscope/models/nlp/mglm/tasks/seq2seq/dataset.py
@@ -209,14 +209,16 @@ class XSumProcessor:
             raise NotImplementedError(split)
         print_rank_0(f'Creating XSUM-{split} dataset from {self.data_dir}')
         with open(
-                os.path.join(
-                    self.data_dir,
-                    'XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json')) as file:
+                os.path.join(self.data_dir,
+                             'XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json'),
+                encoding='utf-8') as file:
             id_list = json.load(file)
         id_list = id_list[key]
         source_texts, target_texts = [], []
         for i, idx in enumerate(id_list):
-            with open(os.path.join(self.data_dir, f'{idx}.summary')) as file:
+            with open(
+                    os.path.join(self.data_dir, f'{idx}.summary'),
+                    encoding='utf-8') as file:
                 key, sentences = None, []
                 source_text, target_text = None, None
                 for line in file:
diff --git a/modelscope/models/nlp/mglm/tasks/superglue/dataset.py b/modelscope/models/nlp/mglm/tasks/superglue/dataset.py
index 36367671..da1fb3d8 100644
--- a/modelscope/models/nlp/mglm/tasks/superglue/dataset.py
+++ b/modelscope/models/nlp/mglm/tasks/superglue/dataset.py
@@ -841,7 +841,7 @@ class RaceProcessor(DataProcessor):
             path, 'middle', '*.txt')) + glob.glob(
                 os.path.join(path, 'high', '*.txt'))
         for filename in filenames:
-            with open(filename, 'r') as f:
+            with open(filename, 'r', encoding='utf-8') as f:
                 for line in f:
                     data = json.loads(line)
                     idx = data['id']
@@ -1127,7 +1127,7 @@ class AgnewsProcessor(DataProcessor):
     def _create_examples(path: str, set_type: str) -> List[InputExample]:
         examples = []
 
-        with open(path) as f:
+        with open(path, encoding='utf-8') as f:
             reader = csv.reader(f, delimiter=',')
             for idx, row in enumerate(reader):
                 label, headline, body = row
@@ -1209,7 +1209,7 @@ class YelpPolarityProcessor(DataProcessor):
     def _create_examples(path: str, set_type: str) -> List[InputExample]:
         examples = []
 
-        with open(path) as f:
+        with open(path, encoding='utf-8') as f:
             reader = csv.reader(f, delimiter=',')
             for idx, row in enumerate(reader):
                 label, body = row
@@ -1419,7 +1419,7 @@ class SquadProcessor(DataProcessor):
     @staticmethod
     def _create_examples(path: str, set_type: str) -> List[InputExample]:
         examples = []
-        with open(path) as f:
+        with open(path, encoding='utf-8') as f:
             data = json.load(f)['data']
 
         for idx, passage in enumerate(data):
diff --git a/modelscope/models/nlp/mglm/tasks/superglue/pvp.py b/modelscope/models/nlp/mglm/tasks/superglue/pvp.py
index ff394172..e149f503 100644
--- a/modelscope/models/nlp/mglm/tasks/superglue/pvp.py
+++ b/modelscope/models/nlp/mglm/tasks/superglue/pvp.py
@@ -538,7 +538,7 @@ class PVP(ABC):
             dict)  # type: Dict[int, Dict[str, List[str]]]
         current_pattern_id = None
 
-        with open(path, 'r') as fh:
+        with open(path, 'r', encoding='utf-8') as fh:
             for line in fh.read().splitlines():
                 if line.isdigit():
                     current_pattern_id = int(line)
diff --git a/modelscope/models/nlp/mglm/utils.py b/modelscope/models/nlp/mglm/utils.py
index 2bfcf8c0..0e781189 100644
--- a/modelscope/models/nlp/mglm/utils.py
+++ b/modelscope/models/nlp/mglm/utils.py
@@ -77,7 +77,7 @@ def print_and_save_args(args, verbose=True, log_dir=None):
         with open(json_file, 'w') as output:
             json.dump(vars(args), output, sort_keys=True)
         if args.deepspeed and args.deepspeed_config is not None:
-            with open(args.deepspeed_config) as file:
+            with open(args.deepspeed_config, encoding='utf-8') as file:
                 deepspeed_config = json.load(file)
             deepspeed_json_file = os.path.join(log_dir,
                                                'config_gpt_large.json')
@@ -324,7 +324,7 @@ def get_checkpoint_iteration(load_path):
         print_rank_0('    will not load any checkpoints and will start from '
                      'random')
         return load_path, 0, False, False
-    with open(tracker_filename, 'r') as f:
+    with open(tracker_filename, 'r', encoding='utf-8') as f:
         metastring = f.read().strip()
         release = metastring == 'release'
         # try:
diff --git a/modelscope/models/science/unifold/data/residue_constants.py b/modelscope/models/science/unifold/data/residue_constants.py
index beebfe89..2701ee38 100644
--- a/modelscope/models/science/unifold/data/residue_constants.py
+++ b/modelscope/models/science/unifold/data/residue_constants.py
@@ -443,7 +443,7 @@ def load_stereo_chemical_props():
     stereo_chemical_props_path = os.path.join(
         os.path.dirname(os.path.abspath(__file__)),
         'stereo_chemical_props.txt')
-    with open(stereo_chemical_props_path, 'rt') as f:
+    with open(stereo_chemical_props_path, 'rt', encoding='utf-8') as f:
         stereo_chemical_props = f.read()
     lines_iter = iter(stereo_chemical_props.splitlines())
     # Load bond lengths.
diff --git a/modelscope/models/science/unifold/dataset.py b/modelscope/models/science/unifold/dataset.py
index 29e1a8b0..f14c2ef7 100644
--- a/modelscope/models/science/unifold/dataset.py
+++ b/modelscope/models/science/unifold/dataset.py
@@ -250,7 +250,7 @@ class UnifoldDataset(UnicoreDataset):
         self.path = data_path
 
         def load_json(filename):
-            return json.load(open(filename, 'r'))
+            return json.load(open(filename, 'r', encoding='utf-8'))
 
         sample_weight = load_json(
             os.path.join(self.path,
@@ -400,7 +400,8 @@ class UnifoldMultimerDataset(UnifoldDataset):
         self.pdb_assembly = json.load(
             open(
                 os.path.join(self.data_path,
-                             json_prefix + 'pdb_assembly.json')))
+                             json_prefix + 'pdb_assembly.json'),
+                encoding='utf-8'))
         self.pdb_chains = self.get_chains(self.inverse_multi_label)
         self.monomer_feature_path = os.path.join(self.data_path,
                                                  'pdb_features')
diff --git a/modelscope/models/science/unifold/msa/pipeline.py b/modelscope/models/science/unifold/msa/pipeline.py
index b7889bff..8037e50e 100644
--- a/modelscope/models/science/unifold/msa/pipeline.py
+++ b/modelscope/models/science/unifold/msa/pipeline.py
@@ -99,7 +99,7 @@ def run_msa_tool(
             f.write(result[msa_format])
     else:
         logging.warning('Reading MSA from file %s', msa_out_path)
-        with open(msa_out_path, 'r') as f:
+        with open(msa_out_path, 'r', encoding='utf-8') as f:
             result = {msa_format: f.read()}
     return result
 
@@ -153,7 +153,7 @@ class DataPipeline:
     def process(self, input_fasta_path: str,
                 msa_output_dir: str) -> FeatureDict:
         """Runs alignment tools on the input sequence and creates features."""
-        with open(input_fasta_path) as f:
+        with open(input_fasta_path, encoding='utf-8') as f:
             input_fasta_str = f.read()
         input_seqs, input_descs = parsers.parse_fasta(input_fasta_str)
         if len(input_seqs) != 1:
diff --git a/modelscope/models/science/unifold/msa/templates.py b/modelscope/models/science/unifold/msa/templates.py
index fe3bcef9..d1ff8cf1 100644
--- a/modelscope/models/science/unifold/msa/templates.py
+++ b/modelscope/models/science/unifold/msa/templates.py
@@ -155,7 +155,7 @@ def _parse_release_dates(path: str) -> Mapping[str, datetime.datetime]:
     """Parses release dates file, returns a mapping from PDBs to release dates."""
     if path.endswith('txt'):
         release_dates = {}
-        with open(path, 'r') as f:
+        with open(path, 'r', encoding='utf-8') as f:
             for line in f:
                 pdb_id, date = line.split(':')
                 date = date.strip()
diff --git a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
index 68cbf918..49991b11 100644
--- a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
+++ b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
@@ -106,14 +106,14 @@ class MovieSceneSegmentationDataset(TorchTaskDataset):
         self.tmpl = '{}/shot_{}_img_{}.jpg'  # video_id, shot_id, shot_num
 
         if not self.test_mode:
-            with open(self.ann_file) as f:
+            with open(self.ann_file, encoding='utf-8') as f:
                 self.anno_data = json.load(f)
             self.vidsid2label = {
                 f"{it['video_id']}_{it['shot_id']}": it['boundary_label']
                 for it in self.anno_data
             }
         else:
-            with open(self.ann_file) as f:
+            with open(self.ann_file, encoding='utf-8') as f:
                 self.anno_data = json.load(f)
 
     def init_sampler(self, cfg):
diff --git a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py b/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
index c90351e9..8b6d22a4 100644
--- a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
+++ b/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
@@ -146,7 +146,7 @@ class ReferringVideoObjectSegmentationDataset(TorchTaskDataset):
         saved_annotations_file_path = osp.join(
             root_path, f'sentences_single_frame_{subset}_annotations.json')
         if osp.exists(saved_annotations_file_path):
-            with open(saved_annotations_file_path, 'r') as f:
+            with open(saved_annotations_file_path, 'r', encoding='utf-8') as f:
                 text_annotations_by_frame = [tuple(a) for a in json.load(f)]
                 return text_annotations_by_frame
         elif (distributed and dist.get_rank() == 0) or not distributed:
@@ -203,7 +203,7 @@ class ReferringVideoObjectSegmentationDataset(TorchTaskDataset):
                 json.dump(text_annotations_by_frame, f)
         if distributed:
             dist.barrier()
-            with open(saved_annotations_file_path, 'r') as f:
+            with open(saved_annotations_file_path, 'r', encoding='utf-8') as f:
                 text_annotations_by_frame = [tuple(a) for a in json.load(f)]
         return text_annotations_by_frame
 
@@ -267,8 +267,10 @@ def get_text_annotations_gt(root_path, subset):
         osp.join(root_path, 'Release/videoset.csv'), header=None)
     # 'vid', 'label', 'start_time', 'end_time', 'height', 'width', 'total_frames', 'annotated_frames', 'subset'
     a2d_data_info.columns = ['vid', '', '', '', '', '', '', '', 'subset']
-    with open(osp.join(root_path, 'text_annotations/missed_videos.txt'),
-              'r') as f:
+    with open(
+            osp.join(root_path, 'text_annotations/missed_videos.txt'),
+            'r',
+            encoding='utf-8') as f:
         unused_videos = f.read().splitlines()
     subsets = {'train': 0, 'test': 1}
     # filter unused videos and videos which do not belong to our train/test subset:
diff --git a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py b/modelscope/msdatasets/task_datasets/video_summarization_dataset.py
index 34eb0450..02639be8 100644
--- a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py
+++ b/modelscope/msdatasets/task_datasets/video_summarization_dataset.py
@@ -26,7 +26,7 @@ class VideoSummarizationDataset(TorchTaskDataset):
         self.list_n_frames = []
         self.list_positions = []
 
-        with open(self.split_filename) as f:
+        with open(self.split_filename, encoding='utf-8') as f:
             data = json.loads(f.read())
             for i, split in enumerate(data):
                 if i == self.split_index:
diff --git a/modelscope/pipelines/audio/asr_inference_pipeline.py b/modelscope/pipelines/audio/asr_inference_pipeline.py
index 6a4864bf..da339083 100644
--- a/modelscope/pipelines/audio/asr_inference_pipeline.py
+++ b/modelscope/pipelines/audio/asr_inference_pipeline.py
@@ -116,7 +116,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
         }
 
         if self.framework == Frameworks.torch:
-            config_file = open(inputs['asr_model_config'])
+            config_file = open(inputs['asr_model_config'], encoding='utf-8')
             root = yaml.full_load(config_file)
             config_file.close()
             frontend_conf = None
diff --git a/modelscope/pipelines/cv/animal_recognition_pipeline.py b/modelscope/pipelines/cv/animal_recognition_pipeline.py
index 671a5b4c..6d395a46 100644
--- a/modelscope/pipelines/cv/animal_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/animal_recognition_pipeline.py
@@ -109,7 +109,7 @@ class AnimalRecognitionPipeline(Pipeline):
 
     def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
         label_mapping_path = osp.join(self.local_path, 'label_mapping.txt')
-        with open(label_mapping_path, 'r') as f:
+        with open(label_mapping_path, 'r', encoding='utf-8') as f:
             label_mapping = f.readlines()
         score = torch.max(inputs['outputs'])
         inputs = {
diff --git a/modelscope/pipelines/cv/general_recognition_pipeline.py b/modelscope/pipelines/cv/general_recognition_pipeline.py
index 80f6f88a..c1136882 100644
--- a/modelscope/pipelines/cv/general_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/general_recognition_pipeline.py
@@ -110,7 +110,7 @@ class GeneralRecognitionPipeline(Pipeline):
 
     def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
         label_mapping_path = osp.join(self.local_path, 'meta_info.txt')
-        with open(label_mapping_path, 'r') as f:
+        with open(label_mapping_path, 'r', encoding='utf-8') as f:
             label_mapping = f.readlines()
         score = torch.max(inputs['outputs'])
         inputs = {
diff --git a/modelscope/pipelines/cv/ocr_recognition_pipeline.py b/modelscope/pipelines/cv/ocr_recognition_pipeline.py
index e81467a1..d90f8db6 100644
--- a/modelscope/pipelines/cv/ocr_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/ocr_recognition_pipeline.py
@@ -49,7 +49,7 @@ class OCRRecognitionPipeline(Pipeline):
         self.infer_model.load_state_dict(
             torch.load(model_path, map_location=self.device))
         self.labelMapping = dict()
-        with open(label_path, 'r') as f:
+        with open(label_path, 'r', encoding='utf-8') as f:
             lines = f.readlines()
             cnt = 2
             for line in lines:
diff --git a/modelscope/pipelines/cv/tinynas_classification_pipeline.py b/modelscope/pipelines/cv/tinynas_classification_pipeline.py
index a470e58b..4dfd5c51 100644
--- a/modelscope/pipelines/cv/tinynas_classification_pipeline.py
+++ b/modelscope/pipelines/cv/tinynas_classification_pipeline.py
@@ -82,7 +82,7 @@ class TinynasClassificationPipeline(Pipeline):
 
     def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
         label_mapping_path = osp.join(self.path, 'label_map.txt')
-        f = open(label_mapping_path)
+        f = open(label_mapping_path, encoding='utf-8')
         content = f.read()
         f.close()
         label_dict = eval(content)
diff --git a/modelscope/pipelines/cv/video_category_pipeline.py b/modelscope/pipelines/cv/video_category_pipeline.py
index e4c73649..4c52205e 100644
--- a/modelscope/pipelines/cv/video_category_pipeline.py
+++ b/modelscope/pipelines/cv/video_category_pipeline.py
@@ -36,7 +36,7 @@ class VideoCategoryPipeline(Pipeline):
         super().__init__(model=model, **kwargs)
         config_path = osp.join(self.model, ModelFile.CONFIGURATION)
         logger.info(f'loading configuration from {config_path}')
-        with open(config_path, 'r') as f:
+        with open(config_path, 'r', encoding='utf-8') as f:
             config = json.load(f)
             self.frame_num = config['frame_num']
             self.level_1_num = config['level_1_num']
diff --git a/modelscope/pipelines/science/protein_structure_pipeline.py b/modelscope/pipelines/science/protein_structure_pipeline.py
index 3dc51c72..1ef9aa29 100644
--- a/modelscope/pipelines/science/protein_structure_pipeline.py
+++ b/modelscope/pipelines/science/protein_structure_pipeline.py
@@ -59,8 +59,9 @@ def load_feature_for_one_target(
 
     else:
         uniprot_msa_dir = data_folder
-        sequence_ids = open(os.path.join(data_folder,
-                                         'chains.txt')).readline().split()
+        sequence_ids = open(
+            os.path.join(data_folder, 'chains.txt'),
+            encoding='utf-8').readline().split()
 
     if symmetry_group is None:
         batch, _ = load_and_process(
diff --git a/modelscope/preprocessors/audio.py b/modelscope/preprocessors/audio.py
index 1e659218..f02381ad 100644
--- a/modelscope/preprocessors/audio.py
+++ b/modelscope/preprocessors/audio.py
@@ -15,7 +15,7 @@ from modelscope.utils.constant import Fields
 
 
 def load_kaldi_feature_transform(filename):
-    fp = open(filename, 'r')
+    fp = open(filename, 'r', encoding='utf-8')
     all_str = fp.read()
     pos1 = all_str.find('AddShift')
     pos2 = all_str.find('[', pos1)
diff --git a/modelscope/preprocessors/kws.py b/modelscope/preprocessors/kws.py
index 6f09d545..33847702 100644
--- a/modelscope/preprocessors/kws.py
+++ b/modelscope/preprocessors/kws.py
@@ -78,7 +78,7 @@ class WavToLists(Preprocessor):
         assert os.path.exists(
             inputs['config_path']), 'model config yaml file does not exist'
 
-        config_file = open(inputs['config_path'])
+        config_file = open(inputs['config_path'], encoding='utf-8')
         root = yaml.full_load(config_file)
         config_file.close()
 
diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py
index 3a3ae820..52cde61c 100644
--- a/modelscope/preprocessors/multi_modal.py
+++ b/modelscope/preprocessors/multi_modal.py
@@ -145,8 +145,9 @@ class CLIPPreprocessor(Preprocessor):
             self.image_resolution = kwargs['resolution']
         else:
             self.image_resolution = json.load(
-                open('{}/vision_model_config.json'.format(
-                    model_dir)))['image_resolution']
+                open(
+                    '{}/vision_model_config.json'.format(model_dir),
+                    encoding='utf-8'))['image_resolution']
         self.img_preprocess = self._build_image_transform()
         # key mapping
         # specify the input keys, compatible with training and inference whose key names may be different
diff --git a/modelscope/preprocessors/nlp/nlp_base.py b/modelscope/preprocessors/nlp/nlp_base.py
index 45efc6e7..7fe28eb5 100644
--- a/modelscope/preprocessors/nlp/nlp_base.py
+++ b/modelscope/preprocessors/nlp/nlp_base.py
@@ -59,8 +59,10 @@ class NLPBasePreprocessor(Preprocessor, ABC):
             self.use_fast = False
         elif self.use_fast is None and os.path.isfile(
                 os.path.join(model_dir, 'tokenizer_config.json')):
-            with open(os.path.join(model_dir, 'tokenizer_config.json'),
-                      'r') as f:
+            with open(
+                    os.path.join(model_dir, 'tokenizer_config.json'),
+                    'r',
+                    encoding='utf-8') as f:
                 json_config = json.load(f)
                 self.use_fast = json_config.get('use_fast')
         self.use_fast = False if self.use_fast is None else self.use_fast
diff --git a/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py b/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py
index 2923157e..5aa662fc 100644
--- a/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py
+++ b/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py
@@ -35,7 +35,10 @@ class DialogIntentPredictionPreprocessor(Preprocessor):
             self.model_dir, config=self.config)
 
         self.categories = None
-        with open(os.path.join(self.model_dir, 'categories.json'), 'r') as f:
+        with open(
+                os.path.join(self.model_dir, 'categories.json'),
+                'r',
+                encoding='utf-8') as f:
             self.categories = json.load(f)
         assert len(self.categories) == 77
 
diff --git a/modelscope/preprocessors/nlp/space/dst_processors.py b/modelscope/preprocessors/nlp/space/dst_processors.py
index 1f9920a9..1b6159b5 100644
--- a/modelscope/preprocessors/nlp/space/dst_processors.py
+++ b/modelscope/preprocessors/nlp/space/dst_processors.py
@@ -184,7 +184,7 @@ class multiwoz22Processor(DSTProcessor):
     # Loads the dialogue_acts.json and returns a list
     # of slot-value pairs.
     def load_acts(self, input_file):
-        with open(input_file) as f:
+        with open(input_file, encoding='utf-8') as f:
             acts = json.load(f)
         s_dict = {}
         for d in acts:
diff --git a/modelscope/preprocessors/nlp/space/fields/gen_field.py b/modelscope/preprocessors/nlp/space/fields/gen_field.py
index 1d1879fe..20b2c48a 100644
--- a/modelscope/preprocessors/nlp/space/fields/gen_field.py
+++ b/modelscope/preprocessors/nlp/space/fields/gen_field.py
@@ -359,12 +359,14 @@ class MultiWOZBPETextField(BPETextField):
             test_list = [
                 line.strip().lower() for line in open(
                     os.path.join(kwargs['data_dir'], 'testListFile.json'),
-                    'r').readlines()
+                    'r',
+                    encoding='utf-8').readlines()
             ]
             dev_list = [
                 line.strip().lower() for line in open(
                     os.path.join(kwargs['data_dir'], 'valListFile.json'),
-                    'r').readlines()
+                    'r',
+                    encoding='utf-8').readlines()
             ]
 
             self.dev_files, self.test_files = {}, {}
diff --git a/modelscope/preprocessors/nlp/space/tokenizer.py b/modelscope/preprocessors/nlp/space/tokenizer.py
index 1bd0ce11..798ce3b7 100644
--- a/modelscope/preprocessors/nlp/space/tokenizer.py
+++ b/modelscope/preprocessors/nlp/space/tokenizer.py
@@ -531,7 +531,7 @@ class GPT2Tokenizer(object):
                  special_tokens=None,
                  max_len=None):
         self.max_len = max_len if max_len is not None else int(1e12)
-        self.encoder = json.load(open(vocab_file))
+        self.encoder = json.load(open(vocab_file, encoding='utf-8'))
         self.decoder = {v: k for k, v in self.encoder.items()}
         self.errors = errors  # how to handle errors in decoding
         self.byte_encoder = bytes_to_unicode()
diff --git a/modelscope/preprocessors/nlp/space_T_cn/fields/database.py b/modelscope/preprocessors/nlp/space_T_cn/fields/database.py
index 5ceb5c79..1300cc95 100644
--- a/modelscope/preprocessors/nlp/space_T_cn/fields/database.py
+++ b/modelscope/preprocessors/nlp/space_T_cn/fields/database.py
@@ -32,12 +32,12 @@ class Database:
         tables = {}
         lines = []
         if type(table_file_path) == str:
-            with open(table_file_path, 'r') as fo:
+            with open(table_file_path, 'r', encoding='utf-8') as fo:
                 for line in fo:
                     lines.append(line)
         elif type(table_file_path) == list:
             for path in table_file_path:
-                with open(path, 'r') as fo:
+                with open(path, 'r', encoding='utf-8') as fo:
                     for line in fo:
                         lines.append(line)
         else:
diff --git a/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py b/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py
index 00c7bcd7..0ebd857e 100644
--- a/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py
+++ b/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py
@@ -45,7 +45,7 @@ class ConversationalTextToSqlPreprocessor(Preprocessor):
             and torch.cuda.is_available() else 'cpu'
         self.processor = None
         self.table_path = os.path.join(self.model_dir, 'tables.json')
-        self.tables = json.load(open(self.table_path, 'r'))
+        self.tables = json.load(open(self.table_path, 'r', encoding='utf-8'))
         self.output_tables = None
         self.path_cache = []
         self.graph_processor = GraphProcessor()
@@ -89,7 +89,7 @@ class ConversationalTextToSqlPreprocessor(Preprocessor):
                 'local_db_path'] not in self.path_cache:
             self.path_cache.append(data['local_db_path'])
             path = os.path.join(data['local_db_path'], 'tables.json')
-            self.tables = json.load(open(path, 'r'))
+            self.tables = json.load(open(path, 'r', encoding='utf-8'))
             self.processor.db_dir = os.path.join(data['local_db_path'], 'db')
             self.output_tables = process_tables(self.processor, self.tables)
             Example.configuration(
diff --git a/modelscope/preprocessors/ofa/base.py b/modelscope/preprocessors/ofa/base.py
index 55b3895d..e5c30ff8 100644
--- a/modelscope/preprocessors/ofa/base.py
+++ b/modelscope/preprocessors/ofa/base.py
@@ -76,7 +76,7 @@ class OfaBasePreprocessor:
         self.constraint_trie = None
         if self.cfg.model.get('answer2label', None):
             ans2label_file = osp.join(model_dir, self.cfg.model.answer2label)
-            with open(ans2label_file, 'r') as reader:
+            with open(ans2label_file, 'r', encoding='utf-8') as reader:
                 ans2label_dict = json.load(reader)
             self.ans2label = ans2label_dict
             self.label2ans = {v: k for k, v in self.ans2label.items()}
diff --git a/modelscope/preprocessors/science/uni_fold.py b/modelscope/preprocessors/science/uni_fold.py
index 2a44c885..ae72433c 100644
--- a/modelscope/preprocessors/science/uni_fold.py
+++ b/modelscope/preprocessors/science/uni_fold.py
@@ -201,7 +201,7 @@ def run_mmseqs2(
     a3m_lines = {}
     for a3m_file in a3m_files:
         update_M, M = True, None
-        with open(a3m_file, 'r') as f:
+        with open(a3m_file, 'r', encoding='utf-8') as f:
             lines = f.readlines()
             for line in lines:
                 if len(line) > 0:
diff --git a/modelscope/trainers/nlp/space/eval.py b/modelscope/trainers/nlp/space/eval.py
index f315ff07..2db40cae 100644
--- a/modelscope/trainers/nlp/space/eval.py
+++ b/modelscope/trainers/nlp/space/eval.py
@@ -771,7 +771,8 @@ class CamRestEvaluator(GenericEvaluator):
     def get_entities(self, entity_path):
         entities_flat = []
         entitiy_to_slot_dict = {}
-        raw_entities = json.loads(open(entity_path).read().lower())
+        raw_entities = json.loads(
+            open(entity_path, encoding='utf-8').read().lower())
         for s in raw_entities['informable']:
             entities_flat.extend(raw_entities['informable'][s])
             for v in raw_entities['informable'][s]:
diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py
index 32e2fa54..1ae5c8d2 100644
--- a/modelscope/utils/audio/audio_utils.py
+++ b/modelscope/utils/audio/audio_utils.py
@@ -47,7 +47,7 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]):
         else:
             return None
 
-    with open(origin_config_file) as f:
+    with open(origin_config_file, encoding='utf-8') as f:
         lines = f.readlines()
     with open(new_config_file, 'w') as f:
         for line in lines:
diff --git a/modelscope/utils/config.py b/modelscope/utils/config.py
index e46da7df..b3512251 100644
--- a/modelscope/utils/config.py
+++ b/modelscope/utils/config.py
@@ -178,7 +178,7 @@ class Config:
         if cfg_text:
             text = cfg_text
         elif filename:
-            with open(filename, 'r') as f:
+            with open(filename, 'r', encoding='utf-8') as f:
                 text = f.read()
         else:
             text = ''
diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py
index 105b3ffa..93cc20e2 100644
--- a/modelscope/utils/hub.py
+++ b/modelscope/utils/hub.py
@@ -124,7 +124,7 @@ def parse_label_mapping(model_dir):
     label2id = None
     label_path = os.path.join(model_dir, ModelFile.LABEL_MAPPING)
     if os.path.exists(label_path):
-        with open(label_path) as f:
+        with open(label_path, encoding='utf-8') as f:
             label_mapping = json.load(f)
         label2id = {name: idx for name, idx in label_mapping.items()}
 
diff --git a/modelscope/utils/nlp/space/clean_dataset.py b/modelscope/utils/nlp/space/clean_dataset.py
index 2c971b10..cbd0ebde 100644
--- a/modelscope/utils/nlp/space/clean_dataset.py
+++ b/modelscope/utils/nlp/space/clean_dataset.py
@@ -59,7 +59,9 @@ def clean_text(data_dir, text):
                   text)  # 'abc.xyz' -> 'abc . xyz'
     text = re.sub(r'(\w+)\.\.? ', r'\1 . ', text)  # if 'abc. ' -> 'abc . '
 
-    with open(os.path.join(data_dir, 'mapping.pair'), 'r') as fin:
+    with open(
+            os.path.join(data_dir, 'mapping.pair'), 'r',
+            encoding='utf-8') as fin:
         for line in fin.readlines():
             fromx, tox = line.replace('\n', '').split('\t')
             text = ' ' + text + ' '
diff --git a/modelscope/utils/nlp/space/db_ops.py b/modelscope/utils/nlp/space/db_ops.py
index d1d14ef9..27198b23 100644
--- a/modelscope/utils/nlp/space/db_ops.py
+++ b/modelscope/utils/nlp/space/db_ops.py
@@ -15,7 +15,9 @@ class MultiWozDB(object):
         self.dbs = {}
         self.sql_dbs = {}
         for domain in all_domains:
-            with open(os.path.join(db_dir, db_paths[domain]), 'r') as f:
+            with open(
+                    os.path.join(db_dir, db_paths[domain]), 'r',
+                    encoding='utf-8') as f:
                 self.dbs[domain] = json.loads(f.read().lower())
 
     def oneHotVector(self, domain, num):
diff --git a/modelscope/utils/nlp/space/utils.py b/modelscope/utils/nlp/space/utils.py
index 56e67671..70cb03a0 100644
--- a/modelscope/utils/nlp/space/utils.py
+++ b/modelscope/utils/nlp/space/utils.py
@@ -146,9 +146,9 @@ class MultiWOZVocab(object):
 
     def load_vocab(self, vocab_path):
         self._freq_dict = json.loads(
-            open(vocab_path + '.freq.json', 'r').read())
+            open(vocab_path + '.freq.json', 'r', encoding='utf-8').read())
         self._word2idx = json.loads(
-            open(vocab_path + '.word2idx.json', 'r').read())
+            open(vocab_path + '.word2idx.json', 'r', encoding='utf-8').read())
         self._idx2word = {}
         for w, idx in self._word2idx.items():
             self._idx2word[idx] = w
diff --git a/setup.py b/setup.py
index eff2f8ba..d709dadc 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def get_hash():
 
 
 def get_version():
-    with open(version_file, 'r') as f:
+    with open(version_file, 'r', encoding='utf-8') as f:
         exec(compile(f.read(), version_file, 'exec'))
     return locals()['__version__']
 
@@ -109,7 +109,7 @@ def parse_requirements(fname='requirements.txt', with_version=True):
             yield info
 
     def parse_require_file(fpath):
-        with open(fpath, 'r') as f:
+        with open(fpath, 'r', encoding='utf-8') as f:
             for line in f.readlines():
                 line = line.strip()
                 if line.startswith('http'):
diff --git a/tests/run.py b/tests/run.py
index b286ecb5..0759379f 100644
--- a/tests/run.py
+++ b/tests/run.py
@@ -247,7 +247,7 @@ def run_in_subprocess(args):
         test_suite_env_map[test_suite_file] = 'default'
 
     if args.run_config is not None and Path(args.run_config).exists():
-        with open(args.run_config) as f:
+        with open(args.run_config, encoding='utf-8') as f:
             run_config = yaml.load(f, Loader=yaml.FullLoader)
         if 'isolated' in run_config:
             isolated_cases = run_config['isolated']
diff --git a/tests/trainers/easycv/test_easycv_trainer.py b/tests/trainers/easycv/test_easycv_trainer.py
index 4bd63c55..5d714097 100644
--- a/tests/trainers/easycv/test_easycv_trainer.py
+++ b/tests/trainers/easycv/test_easycv_trainer.py
@@ -109,7 +109,7 @@ class EasyCVTrainerTestSingleGpu(unittest.TestCase):
         json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
         self.assertEqual(len(json_files), 1)
 
-        with open(json_files[0], 'r') as f:
+        with open(json_files[0], 'r', encoding='utf-8') as f:
             lines = [i.strip() for i in f.readlines()]
 
         self.assertDictContainsSubset(
@@ -185,7 +185,7 @@ class EasyCVTrainerTestMultiGpus(DistributedTestCase):
         json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
         self.assertEqual(len(json_files), 1)
 
-        with open(json_files[0], 'r') as f:
+        with open(json_files[0], 'r', encoding='utf-8') as f:
             lines = [i.strip() for i in f.readlines()]
 
         self.assertDictContainsSubset(
diff --git a/tests/trainers/test_trainer.py b/tests/trainers/test_trainer.py
index c73a56a3..5d466ee0 100644
--- a/tests/trainers/test_trainer.py
+++ b/tests/trainers/test_trainer.py
@@ -248,7 +248,7 @@ class TrainerTest(unittest.TestCase):
         results_files = os.listdir(self.tmp_dir)
 
         json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json')
-        with open(json_file, 'r') as f:
+        with open(json_file, 'r', encoding='utf-8') as f:
             lines = [i.strip() for i in f.readlines()]
         self.assertDictContainsSubset(
             {
@@ -367,7 +367,7 @@ class TrainerTest(unittest.TestCase):
         trainer.train()
         results_files = os.listdir(self.tmp_dir)
         json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json')
-        with open(json_file, 'r') as f:
+        with open(json_file, 'r', encoding='utf-8') as f:
             lines = [i.strip() for i in f.readlines()]
         self.assertDictContainsSubset(
             {
diff --git a/tests/trainers/test_trainer_gpu.py b/tests/trainers/test_trainer_gpu.py
index 0176704a..c003f3c9 100644
--- a/tests/trainers/test_trainer_gpu.py
+++ b/tests/trainers/test_trainer_gpu.py
@@ -142,7 +142,7 @@ class TrainerTestSingleGpu(unittest.TestCase):
         json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
         self.assertEqual(len(json_files), 1)
 
-        with open(json_files[0], 'r') as f:
+        with open(json_files[0], 'r', encoding='utf-8') as f:
             lines = [i.strip() for i in f.readlines()]
         self.assertDictContainsSubset(
             {
@@ -236,7 +236,7 @@ class TrainerTestMultiGpus(DistributedTestCase):
         json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
         self.assertEqual(len(json_files), 1)
 
-        with open(json_files[0], 'r') as f:
+        with open(json_files[0], 'r', encoding='utf-8') as f:
             lines = [i.strip() for i in f.readlines()]
 
         self.assertDictContainsSubset(
@@ -320,7 +320,7 @@ class TrainerTestMultiGpus(DistributedTestCase):
         json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
         self.assertEqual(len(json_files), 1)
 
-        with open(json_files[0], 'r') as f:
+        with open(json_files[0], 'r', encoding='utf-8') as f:
             lines = [i.strip() for i in f.readlines()]
 
         print(results_files, lines)